In [1]:
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from utils_xmatch import wsi_gaia_xmatch
warnings.simplefilter(action='ignore', category=FutureWarning)

Load in wsi and filter catalog

In [2]:
# read in wsi and force data types
wsi = pd.read_csv('data/wsi24.sb.prop.csv').astype({ 
    'wsi_sep':float, 'wsi_sep_e':float, 
    'wsi_pa':float, 'wsi_pa_e':float, 
    'wsi_dm':float, 'wsi_dm_e':float,
    'wds_mag1':float, 'wds_mag2':float,
    'wds_pm1':float, 'wds_pm2':float,
})
print(len(wsi))

# final filters
sample = wsi.reset_index(drop=True)
# sample = wsi.loc[ wsi.wsi_sep > 0.8 ].reset_index(drop=True)
len(sample)

2868


2868

In [3]:
# export sample to query gaia
sample.to_csv('data/wsi.query_sample.csv', index=True, index_label='wsi_oid')

# Current setup
* 5 arcsec search radius
* queries gaia stars within 5 mags of match
* we propagate ALL secondaries

In [4]:
# load in data
wsi = pd.read_csv('data/wsi.query_sample.csv')
gaia_pri = pd.read_csv('data/gaia.results/pri4mag-result.csv')
gaia_sec = pd.read_csv('data/gaia.results/sec4mag1arcmin-result.csv')

print( len(wsi) )
print( len(gaia_pri) )
print( len(gaia_sec) )

2868
4116
7569


In [5]:
# crossmatch with J2016 propagation
xmatch = wsi_gaia_xmatch( wsi, gaia_pri, gaia_sec )

# where did our crossmatch agree with simbad?
xmatch['xm_chk1'] = np.where( xmatch.gaia_id1 == xmatch.sb_id1, True, False )
xmatch['xm_chk2'] = np.where( xmatch.gaia_id2 == xmatch.sb_id2, True, False )

how many stars did we not find a match for?

In [6]:
print('no match candidates found in query')
print( 'primary:', list( xmatch.gaia_flag1 ).count('$') )
print( 'secondary:', list( xmatch.gaia_flag2 ).count('$') )

no match candidates found in query
primary: 0
secondary: 326


For the secondaries with no proper motion, how many matches are missing?

In [7]:
print( 'With PM:', list( xmatch.loc[xmatch.epoch_prop_flag != '!'].gaia_flag2 ).count('$') )
print( 'Without PM:', list( xmatch.loc[xmatch.epoch_prop_flag == '!'].gaia_flag2 ).count('$') )

With PM: 195
Without PM: 131


## SIMBAD Matches

We want to see a few data points:

1. Of the stars that simbad found a match for (no ! flag), what percentage do our cross matches agree?
2. Where does simbad have and not have matches? Specific separation? dm?
3. Of our matches that agree and disagree with simbad, do we see any trends in sep? dm?

In [8]:
# where simbad found pri match
sb1 = xmatch.loc[ xmatch.sb_flg1 == '.' ]
sbf1 = xmatch.loc[ xmatch.sb_flg1 == '!' ]
sbn1 = xmatch.loc[ xmatch.sb_flg1 == '$' ]

# where simbad found sec match
sb2 = xmatch.loc[ xmatch.sb_flg2 == '.' ]
sbf2 = xmatch.loc[ xmatch.sb_flg2 == '!' ]
sbn2 = xmatch.loc[ xmatch.sb_flg2 == '$' ]

print( 'direct |', 'pri:', len(sb1), 'sec:', len(sb2) )
print( 'flagged |', 'pri:', len(sbf1), 'sec:', len(sbf2) )
print( 'no match |', 'pri:', len(sbn1), 'sec:', len(sbn2) )

direct | pri: 2079 sec: 1636
flagged | pri: 19 sec: 10
no match | pri: 770 sec: 1222


In [9]:
print( sb1.xm_chk1.value_counts() )
print( sb1.xm_chk1.value_counts(True) )

xm_chk1
True     2032
False      47
Name: count, dtype: int64
xm_chk1
True     0.977393
False    0.022607
Name: proportion, dtype: float64


In [10]:
print( sb2.xm_chk2.value_counts() )
print( sb2.xm_chk2.value_counts(True) )

xm_chk2
True     1557
False      79
Name: count, dtype: int64
xm_chk2
True     0.951711
False    0.048289
Name: proportion, dtype: float64


In [11]:
xmatch.to_csv('data/wsi24.xmatch.csv', index=False)