## This notebook creates the table '../group_llr_dv_tbl.ecsv'

In [None]:
from os import path

# Third-party
from astropy.table import Table
import astropy.coordinates as coord
import astropy.units as u
from astropy.constants import G, c
import matplotlib.pyplot as plt
from matplotlib.colors import Normalize
import numpy as np
plt.style.use('apw-notebook')
%matplotlib inline
from scipy.stats import scoreatpercentile

import corner
import emcee
from scipy.integrate import quad
from scipy.misc import logsumexp
import schwimmbad

from gwb.data import TGASData
from gwb.fml import ln_H1_FML, ln_H2_FML

from comoving_rv.log import logger
from comoving_rv.db import Session, Base, db_connect
from comoving_rv.db.model import (Run, Observation, TGASSource, SimbadInfo, PriorRV,
                                  SpectralLineInfo, SpectralLineMeasurement, RVMeasurement)

In [None]:
base_path = '/Volumes/ProjectData/gaia-comoving-followup/'
db_path = path.join(base_path, 'db.sqlite')
engine = db_connect(db_path)
session = Session()

In [None]:
class Worker(object):

    def __init__(self, Vinv, n_distance_samples, prior_weights):
        self.Vinv = np.array(Vinv)
        self.prior_weights = prior_weights
        self.n_distance_samples = n_distance_samples

    def work(self, star1, star2, v_scatter):     
        h1 = ln_H1_FML(star1, star2, Vinv=self.Vinv, v_scatter=v_scatter,
                       n_dist_samples=self.n_distance_samples, prior_weights=self.prior_weights)
        h2 = ln_H2_FML(star1, star2, Vinv=self.Vinv, v_scatter=v_scatter,
                       n_dist_samples=self.n_distance_samples, prior_weights=self.prior_weights)
        return h1, h2

In [None]:
# MAGIC NUMBERs
n_distance_samples = 128
Vinvs = np.array([np.eye(3)/15.**2,
                  np.eye(3)/30.**2,
                  np.eye(3)/50.**2])   # 3x3 inverse variances for gaussian prior on velocities
prior_weights = np.array([0.3, 0.55, 0.15])
v_scatter = 1. # km/s

worker = Worker(Vinv=Vinvs, n_distance_samples=n_distance_samples, prior_weights=prior_weights)

In [None]:
group_ids = np.array([x[0] 
                      for x in session.query(Observation.group_id).distinct().all() 
                      if x[0] is not None and x[0] > 0 and x[0] != 10])

From inspecting quality of groups with repeat observations, these are the observations to use:

In [None]:
multi_obs = {
    942: [2,3],
    1497: [0,1],
    1617: [0,1],
    1958: [0,1],
    2308: [2,3],
    3196: [0,1],
    3230: [0,1],
    3376: [2,3],
    3455: [0,1],
    4399: [0,1],
    1397: [1,2],
    1942: [0,1],
    1992: [0,1],
    2499: [1,2],
    2633: [0,1],
    3245: [0,1],
    3287: [0,2],
    3580: [0,2],
    4373: [1,2],
}

In [None]:
base_q = session.query(Observation).join(RVMeasurement).filter(RVMeasurement.rv != None)

names = ['group_id', 'dv', 'lnH1_no_rv', 'lnH2_no_rv', 'lnH1_rv', 'lnH2_rv']
rows = []
for gid in np.unique(group_ids):
    observations = base_q.filter(Observation.group_id == gid).all()
    
    if len(observations) != 2 and gid not in multi_obs:
        print("skipping group {0} ({1})".format(gid, len(observations)))
        continue
        
    if gid in multi_obs:
        obs1 = observations[multi_obs[gid][0]]
        obs2 = observations[multi_obs[gid][1]]
    else:
        obs1, obs2 = observations
    
    raw_rv_diff = (obs1.measurements[0].x0 - obs2.measurements[0].x0) / 6563. * c.to(u.km/u.s)    
    mean_rv = np.mean([obs1.rv_measurement.rv.value, 
                       obs2.rv_measurement.rv.value]) * obs2.rv_measurement.rv.unit
    
    rv1 = mean_rv + raw_rv_diff/2.
    rv_err1 = obs1.measurements[0].x0_error / 6563. * c.to(u.km/u.s)
    rv2 = mean_rv - raw_rv_diff/2.
    rv_err2 = obs2.measurements[0].x0_error / 6563. * c.to(u.km/u.s)
    
    # Compute point-estimate difference in 3D velocity
    icrs1 = obs1.icrs(with_rv=rv1)
    icrs2 = obs2.icrs(with_rv=rv2)
    
    icrs1.set_representation_cls(coord.CartesianRepresentation, coord.CartesianDifferential)
    icrs2.set_representation_cls(coord.CartesianRepresentation, coord.CartesianDifferential)
    
    dv = np.sqrt((icrs1.v_x-icrs2.v_x)**2 + 
                 (icrs1.v_y-icrs2.v_y)**2 + 
                 (icrs1.v_z-icrs2.v_z)**2)
    
    ln_H1_no_rv, ln_H2_no_rv = worker.work(obs1.tgas_star(False), obs2.tgas_star(False), v_scatter)

    star1 = obs1.tgas_star()
    star2 = obs2.tgas_star()
    star1._rv = rv1.value
    star2._rv = rv2.value
    star1._rv_err = rv_err1.value
    star2._rv_err = rv_err2.value
    ln_H1, ln_H2 = worker.work(star1, star2, v_scatter)
    
    rows.append((gid, dv.value, ln_H1_no_rv, ln_H2_no_rv, ln_H1, ln_H2))
    
dtype = dict(names=names, formats=['i4']+['f8']*(len(names)-1))
tbl = np.array(rows, dtype)
tbl = Table(tbl)
tbl['dv'].unit = u.km/u.s

In [None]:
print('{0} pairs remain'.format(len(tbl)))

In [None]:
tbl['R_mu'] = tbl['lnH1_no_rv'] - tbl['lnH2_no_rv']
tbl['R_RV'] = tbl['lnH1_rv'] - tbl['lnH2_rv']

In [None]:
# llr_rv = tbl['lnH1_rv'] - tbl['lnH2_rv']
# llr_no_rv = tbl['lnH1_no_rv'] - tbl['lnH2_no_rv']
llr_no_rv = tbl['R_mu']
llr_rv = tbl['R_RV']
dllr = llr_rv - llr_no_rv

print('{0} pairs are genuinely comoving'.format((llr_rv > llr_no_rv).sum()))

In [None]:
base_q = session.query(Observation).join(RVMeasurement).filter(RVMeasurement.rv != None)

dv_15s = []
dv_meds = []
dv_85s = []
for gid in tbl['group_id']:
    observations = base_q.filter(Observation.group_id == gid).all()
    
    if len(observations) != 2 and gid not in multi_obs:
        print("skipping group {0} ({1})".format(gid, len(observations)))
        continue
        
    if gid in multi_obs:
        obs1 = observations[multi_obs[gid][0]]
        obs2 = observations[multi_obs[gid][1]]
    else:
        obs1, obs2 = observations
    
    raw_rv_diff = (obs1.measurements[0].x0 - obs2.measurements[0].x0) / 6563. * c.to(u.km/u.s)    
    mean_rv = np.mean([obs1.rv_measurement.rv.value, 
                       obs2.rv_measurement.rv.value]) * obs2.rv_measurement.rv.unit
    
    rv1 = mean_rv + raw_rv_diff/2.
    rv_err1 = obs1.measurements[0].x0_error / 6563. * c.to(u.km/u.s)
    rv2 = mean_rv - raw_rv_diff/2.
    rv_err2 = obs2.measurements[0].x0_error / 6563. * c.to(u.km/u.s)
    
    # Compute point-estimate difference in 3D velocity
    icrs1 = obs1.icrs_samples(size=2**16, custom_rv=(rv1,rv_err1))
    icrs2 = obs2.icrs_samples(size=2**16, custom_rv=(rv2,rv_err2))
    
    icrs1.set_representation_cls(coord.CartesianRepresentation, coord.CartesianDifferential)
    icrs2.set_representation_cls(coord.CartesianRepresentation, coord.CartesianDifferential)
    
    dv = np.sqrt((icrs1.v_x-icrs2.v_x)**2 + 
                 (icrs1.v_y-icrs2.v_y)**2 + 
                 (icrs1.v_z-icrs2.v_z)**2)
    
    dv_15, dv_med, dv_85 = scoreatpercentile(dv.value, [15, 50, 85])
    dv_15s.append(dv_15)
    dv_meds.append(dv_med)
    dv_85s.append(dv_85)

In [None]:
tbl['dv_15'] = dv_15s*u.km/u.s
tbl['dv_50'] = dv_meds*u.km/u.s
tbl['dv_85'] = dv_85s*u.km/u.s

In [None]:
base_q = session.query(Observation).join(RVMeasurement).filter(RVMeasurement.rv != None)

sep_2d = []
sep_3d = []
for gid in tbl['group_id']:
    observations = base_q.filter(Observation.group_id == gid).all()
    
    if len(observations) != 2 and gid not in multi_obs:
        print("skipping group {0} ({1})".format(gid, len(observations)))
        continue
        
    if gid in multi_obs:
        obs1 = observations[multi_obs[gid][0]]
        obs2 = observations[multi_obs[gid][1]]
    else:
        obs1, obs2 = observations
    
    icrs1 = obs1.icrs(lutz_kelker=True)
    icrs2 = obs2.icrs(lutz_kelker=True)
    
    sep_2d.append(icrs1.separation(icrs2))
    sep_3d.append(icrs1.separation_3d(icrs2))

In [None]:
tbl['sep_2d'] = u.Quantity(sep_2d)
tbl['sep_3d'] = u.Quantity(sep_3d)

In [None]:
tbl.write('../group_llr_dv_tbl.ecsv', format='ascii.ecsv')