In [1]:
import warnings 
warnings.filterwarnings("ignore")
import pandas as pd
import numpy as np
import math
from tqdm.notebook import tqdm
from astropy.io import fits
import matplotlib.pyplot as plt

pd.set_option('display.max_columns', None)

In [3]:
# Calculate stats for FFI

ffi = pd.read_csv("../data/Kepler_FFI.csv")

# calculate stats for each star

ffi['Mean_Flux'] = None
ffi['Standard_Deviation'] = None
ffi['Average_Error'] = None
ffi['Jitter'] = None

for j in range(1, 53):
    ffi['sigma_' + str(j)] = None

for i in tqdm(ffi.index):
    mean = np.mean(ffi.iloc[i, 1:53])
    ffi.at[i, 'Mean_Flux'] = mean
    
    err = np.median(ffi.iloc[i, 57:109])
    ffi.at[i, 'Average_Error'] = err
    
    stdev = np.std(ffi.iloc[i, 1:53])
    
    # Update stdev according to outliers in first 9 measurements
    missing = []
    for j in range(1, 9):
        sigma_n = (ffi.iloc[i, j] - mean) / stdev
        if sigma_n < -3:
            missing.append(j)
    drop = ffi
    for k in missing:
        drop = drop.drop(drop.columns[k], axis=1).iloc[i, 123:len(drop.columns) - 2]
    stdev = np.std(drop)
    
    ffi.at[i, 'stdev'] = stdev
    
    for j in range(1, 53):
        sigma_n = (ffi.iloc[i, j] - mean) / stdev
        ffi.at[i, 'sigma_' + str(j)] = sigma_n

    if (stdev**2 - err**2) > 0:
        jitter = math.sqrt(stdev**2 - err**2)
        ffi.at[i, 'Jitter'] = jitter
    
    ffi['KIC'] = ffi['KIC'].astype(int) 

ffi.to_csv("Kepler_FFI_Stats.csv", index=False)

FileNotFoundError: [Errno 2] No such file or directory: '../data/Kepler_FFI.csv'

In [7]:
# Import and clean data

def clean(cpcb):
    cpcb = cpcb.replace(to_replace=-1.0, value=np.nan)
    cpcb = cpcb.replace(to_replace=-999, value=np.nan)
    cpcb = cpcb.replace(to_replace='NaN', value=np.nan)
    cpcb = cpcb.replace(to_replace='--', value=np.nan)
    cpcb = cpcb.replace(to_replace=np.inf, value=np.nan)
    cpcb = cpcb.replace(to_replace=-np.inf, value=np.nan)
    cpcb = cpcb.dropna() 
    return cpcb

cpcb = pd.read_csv("../Raw_Data/ROOSTER_Data/kepler_km_full_flag.csv")
cpcb = cpcb.iloc[:, :2]
cpcb.rename(columns = {'Unnamed: 0':'KIC', 'cpcb1_flag':'cpcb'}, inplace = True)
cpcb = clean(cpcb)

eb = pd.read_csv("eb.csv")
eb = eb[['KIC']]
eb['eb'] = 1.0

e1 = pd.read_csv("errors1.csv")
e2 = pd.read_csv("errors2.csv")
e3 = pd.read_csv("errors3.csv")
e4 = pd.read_csv("errors4.csv")
p1 = pd.read_csv("params1.csv")
p2 = pd.read_csv("params2.csv")
p3 = pd.read_csv("params3.csv")
p4 = pd.read_csv("params4.csv")

errors = pd.concat([e1, e2, e3, e4], ignore_index=True)

df = pd.merge(cpcb, ffi, how='right', on='KIC')
df = pd.merge(eb, df, how='right', on='KIC')
df.replace(to_replace=np.nan, value=0.0, inplace=True)

df = pd.merge(errors, df, how='inner', on='KIC')

params = pd.concat([p1, p2, p3, p4], ignore_index=True)

for i in tqdm(params.index):
    for col in params.columns[1:]:
        lst = params[col][i].strip('[] ,').replace(',', '').split()
        lst = [float(j) for j in lst]
        params.at[i, col] = lst

obs = pd.read_csv("obs_info.txt", sep=' ', names=["Time", "Orientation", "Year"])

  0%|          | 0/155070 [00:00<?, ?it/s]

In [8]:
df.to_csv("Kepler_FFI_Fits.csv", index=False)
params.to_csv("Kepler_FFI_Params.csv", index=False)