This notebook has been abstracted from work done elsewhere to provide a clean workflow for producing a summary table. This notebook is particularly useful for querying the SIMBAD database for specific data of interest for a list of Kepler objects.

In [5]:
import pickle

In [8]:
with open('scores/Average','rb') as f:
    avscores = pickle.load(f).set_index('KIC')


In [1]:
from astroquery.simbad import Simbad

In [2]:
Simbad.reset_votable_fields()
Simbad.remove_votable_fields('main_id')
Simbad.add_votable_fields('typed_id')
Simbad.remove_votable_fields('coordinates')
Simbad.add_votable_fields('otype')
Simbad.add_votable_fields('coo')
Simbad.add_votable_fields('biblio')

In [3]:
from astropy.table import Table,vstack

In [9]:
nchunks = int(len(avscores)/1000)
chunks = [range(i*1000,i*1000+1000) for i in range(nchunks)]
lastchunk = range(nchunks*1000,nchunks*1000+len(avscores)%1000)
chunks.append(lastchunk)

Output for follwoing block is suppressed, nonfatal errors are produced for every object that isn't in SIMBAD. Otherwise, the connection is occasionally reset, in which case it tries again until the connection is reestablished. The chunk size should prevent this from overwhelming the server and it *shouldn't* result in being temporarily blacklisted for submitting too many queries at once... Use at own risk.

In [None]:
def simbadquery(avscores,chunk,result_table):
    try:
        chunktable = Simbad.query_objects(['KIC{}'.format(i) for i in avscores.index[chunk]])
        chunktable.remove_columns(['COO_ERR_MAJA','COO_ERR_MINA','COO_ERR_ANGLE'])
        result_table = vstack([result_table,chunktable])
    except ConnectionError:
        simbadquery(avscores,chunk,result_table)
        
    return result_table

result_table = Simbad.query_objects(['KIC{}'.format(i) for i in avscores.index[chunks[0]]])
for chunk in chunks[72:]:
    result_table = simbadquery(avscores,chunk,result_table)

        

In [112]:
simbad_summary_df = result_table.to_pandas().set_index(avscores.index).drop(columns='TYPED_ID')

In [151]:
simbad_summary_df['OTYPE']=simbad_summary_df.loc[:,'OTYPE'].apply(lambda x:x.decode())
simbad_summary_df['BIBLIO']=simbad_summary_df.loc[:,'BIBLIO'].apply(lambda x:x.decode())

In [152]:
simbad_summary_df

Unnamed: 0_level_0,OTYPE,RA,DEC,BIBLIO
KIC,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
757076,Star,19 24 09.2898,+36 35 53.121,2017ApJS..229...30M
757099,Cepheid,19 24 10.3300,+36 35 37.602,2019MNRAS.484..834G|2016ApJ...829...23D|2014Ap...
757137,Eruptive*,19 24 13.4198,+36 33 35.724,2018ApJS..236...42Y|2016ApJ...829...23D
757280,,,,
757450,RotV*,19 24 33.0185,+36 34 38.477,2019AJ....158...59S|2019MNRAS.482.1379H|2019MN...
...,...,...,...,...
100004295,,,,
100004297,,,,
100004298,,,,
100004299,,,,


In [13]:
summary_df = pd.DataFrame(index=avscores.index)

In [14]:
summary_df['median_rank']=avscores.rank(ascending=False).median(axis=1)

In [15]:
summary_df['most_outlying_rank']=avscores.rank(ascending=False).min(axis=1)

In [1]:
from multiprocessing import Pool,cpu_count
import pickle
import pandas as pd

In [2]:
with open('scores/Average','rb') as f:
    avscores = pickle.load(f).set_index('KIC')

In [3]:
def minq(kic):
    minloc = av_scores.rank(ascending=False).loc[kic]==av_scores.rank(ascending=False).min(axis=1).loc[kic]
    min_q = av_scores.loc[kic,minloc].index[0]
    return min_q
global av_scores #Pool.map can only take one variable... so I have to pass avscores as a global
av_scores=avscores

In [6]:
useCpus = cpu_count()
p = Pool(useCpus)
min_q = p.map(minq,av_scores.index)
p.close()
p.join()

In [16]:
summary_df['most_outlying_quarter']=min_q

In [14]:
len(result_table)

201266

In [154]:
full_summary_df = pd.concat([summary_df,simbad_summary_df],axis=1)

In [156]:
full_summary_df.head()

Unnamed: 0_level_0,median_rank,most_outlying_rank,most_outlying_quarter,OTYPE,RA,DEC,BIBLIO
KIC,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
757076,38099.0,8427.0,Q14,Star,19 24 09.2898,+36 35 53.121,2017ApJS..229...30M
757099,2530.0,1563.0,Q2,Cepheid,19 24 10.3300,+36 35 37.602,2019MNRAS.484..834G|2016ApJ...829...23D|2014Ap...
757137,36285.0,13696.0,Q11,Eruptive*,19 24 13.4198,+36 33 35.724,2018ApJS..236...42Y|2016ApJ...829...23D
757280,39413.0,8991.0,Q10,,,,
757450,4610.0,2650.0,Q7,RotV*,19 24 33.0185,+36 34 38.477,2019AJ....158...59S|2019MNRAS.482.1379H|2019MN...


In [155]:
full_summary_df.to_csv('KIC_Summary.csv')