In [7]:
import numpy as np
import pandas as pd
import sys

sys.path.append('python')
from clusterOutliers import clusterOutliers as coo

In [5]:
def make_sampler(inds=['8462852']): 
    """
    Args:
        inds (Array of strings) - array of indices, as identifying strings, to be pulled from a data frame, 
                                  can be with or without kplr prefix
    Returns:
        Function that will pull the data, indicated by inds, from a dataframe df 
        
    Useful to generate samples across quarters with common sources where data is contained as a 
    Pandas dataframe, with indices set to be identifying labels (i.e. kplr008462852)
    
    To use:
        Define array containing IDs of sources of interest as strings
        Define a sample generator by calling make_sampler(inds=Array of string IDs)
        Generate dataframe by calling new function.

    Example:
    tabby_sample = make_sampler(inds=['8462852'])
    Q4_sample = tabby_sample(Q4.data)
    Q8_sample = tabby_sample(Q8.data)
    etc.
    """
    return lambda df: df[df.index.str.contains('|'.join(inds))]

def import_generator(suffix='_FullSample.csv',
                     filepath="/home/dgiles/Documents/KeplerLCs/output/",
                    fits_files_directory="/home/dgiles/Documents/KeplerLCs/fitsFiles/"):
    """
    Args:
        suffix (str) - the suffix of the file to be imported
        filepath (optional, str) - filepath to the file to be imported
        fits_files_directory (optional, str) - path to the directory containing the fits files
        
    Returns:
        lambda function that with single str input of the prefix, typically a Q#.
    
    Use:
        Enables simpler import of multiple quarters of data contained 
        in the same location with the same suffixes.
    
    Requirements: 
    import sys
    sys.path.append('python')
    from clusterOutliers import clusterOutliers as coo
    """
    return lambda QN: coo(filepath+QN+suffix,fits_files_directory+QN+"fitsfiles")

In [8]:
import_quarter = import_generator(suffix="_FullSample.csv")
#qs = ['Q4','Q8','Q11','Q16']
#PCA_folder = "/home/dgiles/Documents/KeplerLCs/output/PCA_reductions/"
#paper_qs = dict(zip(qs,[import_quarter(Q) for Q in qs]))
Q4p = import_quarter('Q4')

In [46]:
ebs_list = list(np.genfromtxt('list_EBs.csv',delimiter=',',dtype=str)[:,0])

In [47]:
list(ebs_list)[:10]

['3863594',
 '10417986',
 '8912468',
 '8758716',
 '10855535',
 '9472174',
 '9612468',
 '6613627',
 '5302006',
 '9898401']

In [49]:
eb_sampler = make_sampler(ebs_list)
Q4_ebs = eb_sampler(Q4p.data)

In [57]:
print("""
Of {} eclipsing binaries identified in the kepler field, {} ({:05.2f}%) are found to be outliers.
""".format(len(Q4_ebs),len(Q4_ebs[Q4_ebs.db_out==-1]),100*len(Q4_ebs[Q4_ebs.db_out==-1])/len(Q4_ebs)))


Of 2187 eclipsing binaries identified in the kepler field, 1673 (76.50%) are found to be outliers.



In [None]:
koi_list = list(np.genfromtxt('list_koi_full.txt',delimiter=',',dtype=str)[:,0])
koi_sampler = make_sampler(koi_list)
Q4_koi = koi_sampler(Q4p.data)
print("""
Of {} KOIs, {} ({:05.2f}%) are found to be outliers.
""".format(len(Q4_koi),len(Q4_koi[Q4_koi.db_out==-1]),100*len(Q4_koi[Q4_koi.db_out==-1])/len(Q4_koi)))

In [59]:
print("""
Of {} KOIs, {} ({:05.2f}%) are found to be outliers.
""".format(len(Q4_koi),len(Q4_koi[Q4_koi.db_out==-1]),100*len(Q4_koi[Q4_koi.db_out==-1])/len(Q4_koi)))


Of 6856 KOIs, 1243 (18.13%) are found to be outliers.



In [60]:
koi_list = list(np.genfromtxt('list_koi_confirmed.txt',delimiter=',',dtype=str)[:,0])
koi_sampler = make_sampler(koi_list)
Q4_koi = koi_sampler(Q4p.data)
print("""
Of {} KOIs, {} ({:05.2f}%) are found to be outliers.
""".format(len(Q4_koi),len(Q4_koi[Q4_koi.db_out==-1]),100*len(Q4_koi[Q4_koi.db_out==-1])/len(Q4_koi)))


Of 1399 KOIs, 94 (06.72%) are found to be outliers.



In [61]:
koi_list = list(np.genfromtxt('list_koi_candidate.txt',delimiter=',',dtype=str)[:,0])
koi_sampler = make_sampler(koi_list)
Q4_koi = koi_sampler(Q4p.data)
print("""
Of {} KOIs, {} ({:05.2f}%) are found to be outliers.
""".format(len(Q4_koi),len(Q4_koi[Q4_koi.db_out==-1]),100*len(Q4_koi[Q4_koi.db_out==-1])/len(Q4_koi)))


Of 1765 KOIs, 85 (04.82%) are found to be outliers.



In [63]:
Q4_koi.index[:10]

Index(['kplr001026957', 'kplr001161345', 'kplr001293379', 'kplr001429589',
       'kplr001431122', 'kplr001432789', 'kplr001575873', 'kplr001717722',
       'kplr001718958', 'kplr001721157'],
      dtype='object', name='KID')