In [1]:
import pandas as pd

In [31]:
k16_path = './data/literature_data/Kraus2016/'

def load_kraus_table(path):
    table = pd.read_csv(path, delim_whitespace=True)
    table['KOI'] = table['KOI'].str.replace('-A-C', '')
    table['KOI'] = table['KOI'].str.replace('-A-B', '')
    table['KOI'] = table['KOI'].str.replace('-B-C', '')
    return table[['KOI', 'sep_mas', 'sep_err']]

# combine companions identified from different methods
table = pd.concat([
    load_kraus_table(k16_path+'Kraus2016_Table3.csv'), # NRM
    load_kraus_table(k16_path+'Kraus2016_Table5.csv'),  # aperture photometry
    load_kraus_table(k16_path+'Kraus2016_Table6.csv')]) # multi-PSF fitting

# query targets where any separation within reported uncertainties
# falls within hires slit width of 0.8arcsec
hires_slit_width = 800 #0.8arcsec = 800mas
table = table.query('sep_mas - sep_err < @hires_slit_width')

# add column with starnames to match training set table
table['id_starname'] = table['KOI'].str.replace('KOI-', 'K0')

In [32]:
table

Unnamed: 0,KOI,sep_mas,sep_err,id_starname
0,KOI-0005,28.548,0.590,K00005
1,KOI-0214,70.938,1.596,K00214
2,KOI-0289,16.940,0.985,K00289
3,KOI-0291,66.170,0.335,K00291
4,KOI-0854,16.089,0.980,K00854
...,...,...,...,...
54,KOI-1613,211.760,1.570,K01613
55,KOI-1835,53.570,1.520,K01835
56,KOI-1977,84.890,1.500,K01977
57,KOI-2418,106.120,1.590,K02418


In [33]:
# next I need to remove these from the training set.
# how many of these are in the training set based on the label dataframe?
# then I need to integrate it into the code.
training_labels = pd.read_csv('./data/label_dataframes/training_labels.csv')
training_labels

Unnamed: 0.3,Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,id_starname,id_kic,id_koicand,id_kepler_name,koi_disposition,koi_period,koi_period_err1,...,iso_sma,iso_sma_err1,iso_sma_err2,iso_insol,iso_insol_err1,iso_insol_err2,iso_teq,iso_teq_err1,iso_teq_err2,spectrum_fileroot
0,0,0,0,K00001,11446443,K00001.01,Kepler-1 b,CONFIRMED,2.470613,1.900000e-08,...,0.035951,0.000596,-0.000596,890.712853,184.876216,-184.876216,1392.188224,71.758330,-71.758330,122.7420
1,1,1,1,K00002,10666592,K00002.01,Kepler-2 b,CONFIRMED,2.204735,3.800000e-08,...,0.036880,0.000727,-0.000727,3029.593093,931.186264,-931.186264,1890.643307,146.140168,-146.140168,122.9200
2,2,2,2,K00003,10748390,K00003.01,Kepler-3 b,CONFIRMED,4.887803,1.770000e-07,...,0.052952,0.000883,-0.000883,116.907786,20.094411,-20.094411,837.962116,35.981752,-35.981752,122.8100
3,3,3,3,K00006,3248033,K00006.01,,FALSE POSITIVE,1.334104,7.070000e-07,...,0.025383,0.000427,-0.000427,3595.445148,694.155894,-694.155894,1973.338972,95.179897,-95.179897,70.1247
4,4,4,4,K00007,11853905,K00007.01,Kepler-4 b,CONFIRMED,3.213669,1.122000e-06,...,0.044247,0.001075,-0.001075,1233.844672,367.335736,-367.335736,1510.353647,112.889880,-112.889880,74.5090
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
884,1212,1212,1958,K04881,8094120,K04881.01,,NOT DISPOSITIONED,5.679295,8.465000e-05,...,0.066177,0.001101,-0.001101,572.848502,144.283788,-144.283788,1246.733048,78.028181,-78.028181,179.4980
885,1217,1217,1963,K04907,5437762,K04907.01,,NOT DISPOSITIONED,4.044849,8.795000e-05,...,0.051001,0.000856,-0.000856,746.872395,191.465164,-191.465164,1332.217481,86.174159,-86.174159,179.8840
886,1236,1236,1982,K05236,6067545,K05236.01,,CANDIDATE,550.859839,8.210000e-03,...,1.329101,0.022574,-0.022574,0.742273,0.125419,-0.125419,236.539941,9.970574,-9.970574,211.7900
887,1272,1272,2018,K05900,11453930,K05900.01,,FALSE POSITIVE,355.847310,2.086000e-02,...,1.041967,0.017506,-0.017506,1.972225,0.329911,-0.329911,301.996993,12.558650,-12.558650,189.2508


In [34]:
# how many of these are in training labels?
# 35 training set stars have known binaries.
# I should remove before training
# but those will be interesting to look at! 
# I'm goin to organize my notes before implementing this
len([koi for koi in training_labels.id_starname.to_numpy() if koi in table.id_starname.to_numpy()])

35

In [35]:
import numpy as np
df = pd.merge(table, training_labels, on='id_starname')
len(np.unique(df.id_starname))

35

In [36]:
# this is the code i need to add

training_labels[~training_labels['id_starname'].isin(table['id_starname'])]

Unnamed: 0.3,Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,id_starname,id_kic,id_koicand,id_kepler_name,koi_disposition,koi_period,koi_period_err1,...,iso_sma,iso_sma_err1,iso_sma_err2,iso_insol,iso_insol_err1,iso_insol_err2,iso_teq,iso_teq_err1,iso_teq_err2,spectrum_fileroot
0,0,0,0,K00001,11446443,K00001.01,Kepler-1 b,CONFIRMED,2.470613,1.900000e-08,...,0.035951,0.000596,-0.000596,890.712853,184.876216,-184.876216,1392.188224,71.758330,-71.758330,122.7420
1,1,1,1,K00002,10666592,K00002.01,Kepler-2 b,CONFIRMED,2.204735,3.800000e-08,...,0.036880,0.000727,-0.000727,3029.593093,931.186264,-931.186264,1890.643307,146.140168,-146.140168,122.9200
2,2,2,2,K00003,10748390,K00003.01,Kepler-3 b,CONFIRMED,4.887803,1.770000e-07,...,0.052952,0.000883,-0.000883,116.907786,20.094411,-20.094411,837.962116,35.981752,-35.981752,122.8100
3,3,3,3,K00006,3248033,K00006.01,,FALSE POSITIVE,1.334104,7.070000e-07,...,0.025383,0.000427,-0.000427,3595.445148,694.155894,-694.155894,1973.338972,95.179897,-95.179897,70.1247
4,4,4,4,K00007,11853905,K00007.01,Kepler-4 b,CONFIRMED,3.213669,1.122000e-06,...,0.044247,0.001075,-0.001075,1233.844672,367.335736,-367.335736,1510.353647,112.889880,-112.889880,74.5090
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
884,1212,1212,1958,K04881,8094120,K04881.01,,NOT DISPOSITIONED,5.679295,8.465000e-05,...,0.066177,0.001101,-0.001101,572.848502,144.283788,-144.283788,1246.733048,78.028181,-78.028181,179.4980
885,1217,1217,1963,K04907,5437762,K04907.01,,NOT DISPOSITIONED,4.044849,8.795000e-05,...,0.051001,0.000856,-0.000856,746.872395,191.465164,-191.465164,1332.217481,86.174159,-86.174159,179.8840
886,1236,1236,1982,K05236,6067545,K05236.01,,CANDIDATE,550.859839,8.210000e-03,...,1.329101,0.022574,-0.022574,0.742273,0.125419,-0.125419,236.539941,9.970574,-9.970574,211.7900
887,1272,1272,2018,K05900,11453930,K05900.01,,FALSE POSITIVE,355.847310,2.086000e-02,...,1.041967,0.017506,-0.017506,1.972225,0.329911,-0.329911,301.996993,12.558650,-12.558650,189.2508


In [23]:
854+35

889