In [6]:
import numpy as np
import pandas as pd
import requests

import matplotlib.pyplot as plt
import seaborn as sns

from IPython.display import display, HTML

In [7]:

# this is the base path for the EXOMAST API 
base_url = "https://exo.mast.stsci.edu/api/v0.1/"

#kepler data validated time series
kepler_dv_url = base_url + "dvdata/kepler/"

In [39]:
class KOIObject():
    

    def __init__(self, KICID, tce_input = 1):


        self.kicid = KICID

        # this corresponds to the TCE planetary number in the Kepler cumulative table

        # if no argument specified, defaults to first threshold crossing event (tce_index = 1).
        self.tce_index = tce_input

    
        self.total_TCE_num = len(self.list_all_TCE())

        # Full data table with data validated light curves for given TCE. 
        # Contains initial corrected light curves as well as whitened and median detrended versions of light curve.
        # Constructor auto-initializes to empty. Need to call load_data() method to fill this.

        self.full_datatable = None

        # metadata for TCE extracted by Kepler autofitting pipeline. The fitting here is pretty rudimentary.
        #  Many of the pipeline values are not exactly the same as in the cumulative table (entries there are fit much more carefully).
        # BUT period, duration, and depth are generally fit pretty decently. 
        # Transit period here generally agree almost exactly with cumulative table. Thus these can be used for record linkage, if need be.

        self.period = None
        self.duration = None
        self.depth = None


    #----------------------------DATA LOAD METHODS FOR GIVEN TCE------------------------------------------
    


    # total initialize can take a little time.

    def total_initialize(self):
        self.load_data().load_metadata()

        #default is TCE 1. Probably won't need this method directly. 
        #returns full dataframe for integer indexed TCE (from the TCE_list) for a given KOI.

    def load_data(self):
        lc_data_url = kepler_dv_url + str(self.kicid) + '/table/?tce=' + str(self.tce_index)

        lcrequest = requests.get(lc_data_url)
        lightcurve_json= lcrequest.json()

        # curves are in data key in json. convert to pandas df.

        lcdata = pd.DataFrame(lightcurve_json['data'])

        self.full_datatable = lcdata

        return self

    
    def load_metadata(self):
        tcemeta_url = kepler_dv_url + str(self.kicid) + '/info/?tce=' + str(self.tce_index)
        metadata_req = requests.get(tcemeta_url)

        metadata = metadata_req.json()['DV Data Header']

        self.period = metadata['TPERIOD']
        self.duration = metadata['TDUR']
        self.depth = metadata['TDEPTH']

        return self

    #-------------------------------TIME SERIES PROCESSING-------------------------------------------

    #some of these require some basic processing to clearly see the transit. 
    # We're using the median detrended light curves.

    def phase_folded(self):

        phased_LCdetrend = self.full_datatable.groupby('PHASE').mean().sort_index().loc[:, 'LC_DETREND']

        #phased_LCdetrend is a series

        return phased_LCdetrend

    
    def phase_binned(self, bin_width = None): # bin width in days

        # sampling interval in days
        

        if bin_width == None:
            #use Kepler sampling interval as default bin width for phase (which is in days)
            smplng_intvl = self.full_datatable['TIME'].diff().mean()
            bw = smplng_intvl
        else:
            bw = bin_width
        
        phasedLC = self.phase_folded()
        
        phase_range = phasedLC.index.max() - phasedLC.index.min()
        bins = round(phase_range/bw)


        #convert this to dataframe for further manipulation.
        phasedLC_df = phasedLC.to_frame()
        phasedLC_df['phase_bin'] = pd.cut(phasedLC.index, bins)

        # gets midpoint of each phase bucket
        phasedLC_df['phase_mid'] = phasedLC_df['phase_bin'].apply(lambda x: x.mid)

        # bin average the detrended light curve 
        phase_binned = phasedLC_df.groupby('phase_mid').mean().loc[:,'LC_DETREND']

        #phase_binned has a sorted categorical index, but we want to convert this to a floating point index
        floatind = phase_binned.index.astype('float')

        phase_binned.index = floatind

        return phase_binned

        # purpose of this function is to automatically get a centered close-up on primary transit of TCE.
        # CS stands for centered/short

    def phase_binned_CS(self, window_mult = None):
        if window_mult == None:
            # set window size to four times the duration by default
            # duration is in hours so convert to days for phase.
            delta_phase = 2*0.0417*self.duration #windowsize is 2*delta_phase
        else:
            delta_phase = window_mult*2*0.0417*self.duration

        phaseb_ser = self.phase_binned()
        phaseCS = phaseb_ser.loc[-delta_phase: delta_phase]

        return phaseCS


    #---------------------METHODS FOR LISTING ALL TCES FOR Kepler Object-----------------------

    # methods for listing all TCEs for a given Kepler catalog object (KIC).

    def list_all_TCE(self):
        tcelist_url = kepler_dv_url + str(self.kicid) + '/tces/'

        tcelist_req = requests.get(tcelist_url)
        tcelist = tcelist_req.json()
        
        return tcelist    

    #---------------------PLOTTING FUNCTIONS----------------------------------------------------

    def plot_detrended_lc(self):
        sns.scatterplot(x = 'TIME', y = 'LC_DETREND', data = self.full_datatable, s = 2, label = 'Detrended LC')
        plt.ylabel('Relative Flux')
        plt.xlabel('Time (Baryocentric Julian Days)')
        plt.title('Median Detrended Light Curve')
        plt.legend()
        plt.show()
    

    
    
    

In [40]:
m = KOIObject(8394721, 2)

In [41]:
m.list_all_TCE()

{'TCE': ['TCE_1', 'TCE_3', 'TCE_2', 'TCE_4']}

In [42]:
m.load_data()

<__main__.KOIObject at 0x24c113cb7c0>

In [43]:
m.load_metadata()

<__main__.KOIObject at 0x24c113cb7c0>

In [45]:
# m.plot_detrended_lc()

In [46]:
# m.phased_binned().plot()

In [323]:
m.period

13.484546998744758

In [139]:
m.full_datatable['TIME'].diff().mean()

0.0204335818138457