In [None]:
# Interpret and visualize variability types / labelling accuracy

In [1]:
import warnings 
warnings.filterwarnings("ignore")
import numpy as np
from astropy.io import fits
from astropy.time import Time
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
import pandas as pd
import sys
import sklearn
import scipy
import math
import ast
from tqdm.notebook import tqdm
pd.set_option('display.max_columns', None)

In [None]:
flux = pd.read_csv("../output/flux1.csv")
err = pd.read_csv("../output/errors1.csv")
params = pd.read_csv("../output/params1.csv")

varis = [flux, err, params]
names = ['flux', 'errors', 'params']

for i in range(3):
    r2 = pd.read_csv("../output/" + names[i] + "2.csv")
    r3 = pd.read_csv("../output/" + names[i] + "3.csv")
    varis[i] = pd.concat([varis[i], r2], ignore_index=True)
    varis[i] = pd.concat([varis[i], r3], ignore_index=True)
    
flux = varis[0]
err = varis[1]
params = varis[2]
  

flux = flux.drop(flux[flux['KIC'] == 0].index)

rem = flux.index.tolist()

for col in tqdm(flux.columns[109:165]):
    dele = flux[flux[col] == 0].index.tolist()
    rem = list(set(rem) & set(dele))

flux = flux[~flux.index.isin(rem)]
flux = flux.drop_duplicates(subset='KIC', keep="first")

err = err[err['KIC'].isin(flux['KIC'])]
err = err.drop_duplicates(subset='KIC', keep="first")

params = params[params['KIC'].isin(flux['KIC'])]
params = params.drop_duplicates(subset='KIC', keep="first")

for i in tqdm(params.index):
    for col in params.columns[1:]:
        lst = params[col][i].strip('[] ,').replace(',', '').split()
        lst = [float(j) for j in lst]
        params.at[i, col] = lst

params['KIC'] = params['KIC'].astype(int) 

In [None]:
df = pd.merge(flux, err, how='inner', on='KIC')
        
df['KIC'] = df['KIC'].astype(int) 

  0%|          | 0/56 [00:00<?, ?it/s]

  0%|          | 0/127153 [00:00<?, ?it/s]

In [5]:
# a model for a sine curve
def func(x, a, b, c, d):
    return a*np.sin(b * x + c) + d 

# a model for a straight line
def straightLine(x, m, b):
    return m*x + b

# a model for a sinecurve that is tiled on an axis
def func_tilt(x, a, b, c, d, e):
    return a*np.sin(b * x + c) + d + e*x

# a model for a straight line
def flatLine(x, b):
    return 0*x + b

In [6]:
# Visualize recorded light curves from FFI data and various fits

# the time points of all the data points in order
abs = [54945.74206,54945.8670833,54946.0182163,54946.2335259,54946.3377372,54946.5495655,54946.7334679,
         54947.1662509,55062.8253608,55091.0055606,55123.0864583,55153.9797114,55182.0366329,55215.9548927,
         55216.0352649,55245.7660019,55274.7398619,55307.5350333,55336.4281441,55370.695297,55399.0571196,
         55430.8109392,55461.8291274,55492.8064711,55522.7621353,55552.0843583,55585.5760016,55614.7389024,
         55677.4444762,55706.6440223,55738.4591443,55769.477399,55801.7624176,55832.8010616,55864.8001316,
         55895.757043,55930.8619526,55958.4268791,55986.5230115,56014.5579184,56047.5173675,56077.4525238,
         56105.5895974,56137.5273556,56168.8315615,56203.8547584,56236.834641,56267.9141396,56303.6729488,
         56330.563549,56357.495041,56390.4952381]
times = np.asarray(abs) - 54833

time = Time(abs, format='mjd')
normalized = time.decimalyear

cols = [str(i) for i in abs]

def visualize(kid, y_min = None, y_max = None):
    if kid not in df['KIC'].values:
        print("KIC does not have valid FFI data.")
    else:
        df_row = df[df['KIC'] == kid].iloc[0]
        params_row = params[params['KIC']== kid].iloc[0]
        
        x_plot = normalized
        y_plot = df_row[cols]
        
        plt.figure(figsize=(10, 10)) 
        ax = plt.axes()
        ax.errorbar(x_plot, y_plot, yerr=df_row[57:109], lw = 2, capsize = 4, capthick = 1, linestyle = '')
        scatter = ax.scatter(x_plot, y_plot, c=np.where(np.abs(df_row[110:162].to_numpy()) < 3, True, False), cmap='PiYG')
        
        line_inp = np.linspace(times[0], times[-1], 500)
        line_plot = np.linspace(normalized[0], normalized[-1], 500)
        
        flat, = ax.plot(line_plot, flatLine(line_inp, params_row['flatLineParam']))
        straight, = ax.plot(line_plot, straightLine(line_inp, params_row['straightLineParam'][0], params_row['straightLineParam'][1]))
        curve, = ax.plot(line_plot, func(line_inp, params_row['curveParamL2'][0], params_row['curveParamL2'][1], params_row['curveParamL2'][2], params_row['curveParamL2'][3]))
        curtil, = ax.plot(line_plot, func_tilt(line_inp, params_row['curtilParamL2'][0], params_row['curtilParamL2'][1], params_row['curtilParamL2'][2], params_row['curtilParamL2'][3], params_row['curtilParamL2'][4]))
        
        # plt.axis('square')
        ax.set_xlabel('Year')
        ax.set_ylabel('Relative Flux')
        if (type(y_min) == int or type(y_min) == float) and (type(y_max) == int or type(y_max) == float):
            plt.ylim(y_min, y_max)
        ax.legend([flat, straight, curve, curtil], ['Flat Line', 'Straight Line', 'Curve', 'Tilted Curve'], loc='best')
        
        print("-- KIC: " + str(kid) + " --")
        plt.show()
        print("CP/CB: " + str(df_row['cpcb']))
        print("EB: " + str(df_row['eb']))
        print("Flat line L2 error: " + str(df_row['flatLineErrorL2']))
        print("Straight line L2 error: " + str(df_row['straightLineErrorL2']))
        print("Curve L2 error: " + str(df_row['curveErrorL2']))
        print("Tilted curve L2 error: " + str(df_row['curtilErrorL2']))
        print("Standard deviation: " + str(df_row['stdev']))
        print("Number of outliers: " + str(df_row['n_outliers']) + "\n")

In [None]:
imp = improvement(df, 'straightLineErrorL2', 'straightLineParam', 'curtilErrorL2', 'curtilParamL2')

# Plot straight vs curved fit error

data = imp

x_plot = data['straightLineErrorL2']
y_plot = data['curtilErrorL2']

plt.figure(figsize=(8, 8)) 

scatter = plt.scatter(x_plot, y_plot, c=data['eb'], cmap='viridis', s=2)
plt.legend(handles=scatter.legend_elements()[0], 
           labels=[0.0, 1.0],
           title="CP/CB Flag")
plt.xlabel('Flat Line Error L1')
plt.ylabel('Straight Line Error L1')

plt.xlim(0, 5000)
plt.ylim(0, 5000)

plt.show()

In [None]:
# Print proportions of data that fit one curve better than another

err_labels = ['curveErrorL2',
        'straightLineErrorL2',
        'curtilErrorL2',
       'flatLineErrorL2']

par_labels = ['curveParamL2',
        'straightLineParam',
        'curtilParamL2',
        'flatLineParam']

for i in range(len(err_labels)):
    for j in range(len(err_labels)):
        if i != j:
            combo = improvement(df, err_labels[i], par_labels[i], err_labels[j], par_labels[j])
            print(err_labels[i])
            print(err_labels[j])
            print("df length: " + str(len(combo)))
            print("percentage eb: " + str(len(combo[combo['eb'] == 1]) / len(df[df['eb'] == 1])))
            print("percentage eb of data: " + str(len(combo[combo['eb'] == 1]) / len(combo)))
            print("percentage cpcb: " + str(len(combo[combo['cpcb'] == 1]) / len(df[df['cpcb'] == 1])))
            print("percentage cpcb of data: " + str(len(combo[combo['cpcb'] == 1]) / len(combo)) + "\n")

In [None]:
# Find percentage of EBs for some # stdevs away (3+)

for i in range(-3, -8, -1):
    s = df
    repl = pd.DataFrame()
    for j in range(123, len(s.columns)):
        add = s[s.iloc[:, j] < i]
        repl = repl.append(add, ignore_index=True)
    print('percent eb: ' + str(len(repl[repl['eb'] == 1]) / len(df[df['eb'] == 1]) * 100))
    print('percent correct: ' + str(len(repl[repl['eb'] == 1]) / len(repl) * 100))
    print(len(repl))
    print()

In [None]:
# Find outlying errors > 20000

l2 = ['flatLineErrorL2', 'straightLineErrorL2', 'curveErrorL2', 'curtilErrorL2']

rem = df.copy(deep=True)
for l in l2:
    rem = rem.drop(rem[rem[l] > 20000].index)
rem

In [None]:
# Find percentage of data set that have large uncertainty measurements

vals = [0.1, 0.5, 1]
    
for val in vals:
    weird = pd.DataFrame()    

# big uncertainties
    for i in range(67, 119):
        add = df[df.iloc[:, i] > val]
        weird = weird.append(add, ignore_index=True)
        
    print(val)
    print(len(weird))
    print()

In [52]:
%run classify_types.ipynb

# df = good()

print(len(long_term()))
print(len(mid_term()))
print(len(short_term()))
print(len(non_var()))
print(len(df[df['eb'] == 1]))
print(len(eb()))
print(len(df))
# nonvar 60-80%
print(len(non_var()) / len(df))

11389
5147
8737
100010
1870
1870
127153
0.7865327597461326


In [48]:
df['curtilErrorL2'].mean()

145075177972694.94

In [192]:
st = short_term()

In [193]:
st[st['KIC'] == 757099]

Unnamed: 0,KIC,54945.74206,54945.8670833,54946.0182163,54946.2335259,54946.3377372,54946.5495655,54946.7334679,54947.1662509,55062.8253608,55091.0055606,55123.0864583,55153.9797114,55182.0366329,55215.9548927,55216.0352649,55245.7660019,55274.7398619,55307.5350333,55336.4281441,55370.695297,55399.0571196,55430.8109392,55461.8291274,55492.8064711,55522.7621353,55552.0843583,55585.5760016,55614.7389024,55677.4444762,55706.6440223,55738.4591443,55769.477399,55801.7624176,55832.8010616,55864.8001316,55895.757043,55930.8619526,55958.4268791,55986.5230115,56014.5579184,56047.5173675,56077.4525238,56105.5895974,56137.5273556,56168.8315615,56203.8547584,56236.834641,56267.9141396,56303.6729488,56330.563549,56357.495041,56390.4952381,uncert_new_0,uncert_new_1,uncert_new_2,uncert_new_3,mod_unc_new_00,mod_unc_new_01,mod_unc_new_02,mod_unc_new_03,mod_unc_new_04,mod_unc_new_05,mod_unc_new_06,mod_unc_new_07,mod_unc_new_08,mod_unc_new_09,mod_unc_new_10,mod_unc_new_11,mod_unc_new_12,mod_unc_new_13,mod_unc_new_14,mod_unc_new_15,mod_unc_new_16,mod_unc_new_17,mod_unc_new_18,mod_unc_new_19,mod_unc_new_20,mod_unc_new_21,mod_unc_new_22,mod_unc_new_23,mod_unc_new_24,mod_unc_new_25,mod_unc_new_26,mod_unc_new_27,mod_unc_new_28,mod_unc_new_29,mod_unc_new_30,mod_unc_new_31,mod_unc_new_32,mod_unc_new_33,mod_unc_new_34,mod_unc_new_35,mod_unc_new_36,mod_unc_new_37,mod_unc_new_38,mod_unc_new_39,mod_unc_new_40,mod_unc_new_41,mod_unc_new_42,mod_unc_new_43,mod_unc_new_44,mod_unc_new_45,mod_unc_new_46,mod_unc_new_47,mod_unc_new_48,mod_unc_new_49,mod_unc_new_50,mod_unc_new_51,stdev,sigma_0,sigma_1,sigma_2,sigma_3,sigma_4,sigma_5,sigma_6,sigma_7,sigma_8,sigma_9,sigma_10,sigma_11,sigma_12,sigma_13,sigma_14,sigma_15,sigma_16,sigma_17,sigma_18,sigma_19,sigma_20,sigma_21,sigma_22,sigma_23,sigma_24,sigma_25,sigma_26,sigma_27,sigma_28,sigma_29,sigma_30,sigma_31,sigma_32,sigma_33,sigma_34,sigma_35,sigma_36,sigma_37,sigma_38,sigma_39,sigma_40,sigma_41,sigma_42,sigma_43,sigma_44,sigma_45,sigma_46,sigma_47,sigma_48,sigma_49,sigma_50,sigma_51,Mean_Flux,Average_Error,Jitter,n_outliers,cpcb,eb,curveErrorL1,curveErrorL2,straightLineErrorL1,straightLineErrorL2,curtilErrorL1,curtilErrorL2,flatLineErrorL1,flatLineErrorL2
1,757099,1.013488,1.010054,0.997953,1.009844,0.998879,0.995125,0.995859,1.013546,0.994246,1.00519,1.010356,0.991016,0.981155,1.019919,0.985755,0.990782,0.984506,0.989966,0.978437,1.020685,0.998982,0.979769,1.023834,1.01521,0.981394,1.004319,1.002591,0.998362,0.994717,1.001557,0.998443,1.004295,1.008772,1.014491,1.052515,1.053241,1.001782,0.982918,1.03927,1.018924,1.016688,1.037179,0.971693,0.950573,0.984871,1.0,0.989164,0.993826,0.998218,1.002691,1.001638,0.977546,0.017941,0.022807,0.017341,0.018925,0.000723,0.000715,0.000919,0.000625,0.000884,0.000879,0.000911,0.001159,0.002098,0.002131,0.001822,0.001758,0.00171,0.002068,0.001924,0.00136,0.002272,0.001424,0.00132,0.002431,0.001815,0.000849,0.001832,0.001526,0.000781,0.00093,0.001623,0.000787,0.001371,0.001691,0.002505,0.002209,0.000748,0.00141,0.001806,0.000831,0.001305,0.001678,0.001154,0.002226,0.002639,0.002979,0.003015,0.002902,0.001108,0.001535,0.002761,0.001472,0.002145,0.002168,0.001631,0.002797,0.018864,0.627111,0.445046,-0.196412,0.433934,-0.147315,-0.346349,-0.307448,0.63016,-0.392904,0.187204,0.461083,-0.564152,-1.086876,0.968014,-0.843042,-0.576544,-0.909273,-0.619804,-1.230959,1.008592,-0.141863,-1.160364,1.175521,0.718365,-1.074221,0.141056,0.049461,-0.174749,-0.367959,-0.005381,-0.170437,0.139745,0.377087,0.680239,2.695948,2.734399,0.006545,-0.993421,1.993816,0.915283,0.796704,1.882971,-1.58849,-2.708058,-0.889928,-0.087909,-0.662337,-0.415198,-0.182362,0.054742,-0.001069,-1.2782,1.001658,0.001627,0.018794,0.0,0.0,0.0,450.735444,7485.143984,498.7489,10366.045629,449.165266,7540.829843,498.566784,10357.555072


In [None]:
len(st[st['eb'] == 1]) / len(st)

In [None]:
# white dwarf w accretion disk and a more massive star, cataclysmic binary star
st[st['KIC'] == 1026475]

In [47]:
# jitter < 0.001 = non var?
len(st[st['Jitter'] < 0.001]) / len(st)

0.2429271563667919

In [50]:
df[df['curtilErrorL2'] > 20000]

Unnamed: 0,KIC,54945.74206,54945.8670833,54946.0182163,54946.2335259,54946.3377372,54946.5495655,54946.7334679,54947.1662509,55062.8253608,55091.0055606,55123.0864583,55153.9797114,55182.0366329,55215.9548927,55216.0352649,55245.7660019,55274.7398619,55307.5350333,55336.4281441,55370.695297,55399.0571196,55430.8109392,55461.8291274,55492.8064711,55522.7621353,55552.0843583,55585.5760016,55614.7389024,55677.4444762,55706.6440223,55738.4591443,55769.477399,55801.7624176,55832.8010616,55864.8001316,55895.757043,55930.8619526,55958.4268791,55986.5230115,56014.5579184,56047.5173675,56077.4525238,56105.5895974,56137.5273556,56168.8315615,56203.8547584,56236.834641,56267.9141396,56303.6729488,56330.563549,56357.495041,56390.4952381,uncert_new_0,uncert_new_1,uncert_new_2,uncert_new_3,mod_unc_new_00,mod_unc_new_01,mod_unc_new_02,mod_unc_new_03,mod_unc_new_04,mod_unc_new_05,mod_unc_new_06,mod_unc_new_07,mod_unc_new_08,mod_unc_new_09,mod_unc_new_10,mod_unc_new_11,mod_unc_new_12,mod_unc_new_13,mod_unc_new_14,mod_unc_new_15,mod_unc_new_16,mod_unc_new_17,mod_unc_new_18,mod_unc_new_19,mod_unc_new_20,mod_unc_new_21,mod_unc_new_22,mod_unc_new_23,mod_unc_new_24,mod_unc_new_25,mod_unc_new_26,mod_unc_new_27,mod_unc_new_28,mod_unc_new_29,mod_unc_new_30,mod_unc_new_31,mod_unc_new_32,mod_unc_new_33,mod_unc_new_34,mod_unc_new_35,mod_unc_new_36,mod_unc_new_37,mod_unc_new_38,mod_unc_new_39,mod_unc_new_40,mod_unc_new_41,mod_unc_new_42,mod_unc_new_43,mod_unc_new_44,mod_unc_new_45,mod_unc_new_46,mod_unc_new_47,mod_unc_new_48,mod_unc_new_49,mod_unc_new_50,mod_unc_new_51,stdev,sigma_0,sigma_1,sigma_2,sigma_3,sigma_4,sigma_5,sigma_6,sigma_7,sigma_8,sigma_9,sigma_10,sigma_11,sigma_12,sigma_13,sigma_14,sigma_15,sigma_16,sigma_17,sigma_18,sigma_19,sigma_20,sigma_21,sigma_22,sigma_23,sigma_24,sigma_25,sigma_26,sigma_27,sigma_28,sigma_29,sigma_30,sigma_31,sigma_32,sigma_33,sigma_34,sigma_35,sigma_36,sigma_37,sigma_38,sigma_39,sigma_40,sigma_41,sigma_42,sigma_43,sigma_44,sigma_45,sigma_46,sigma_47,sigma_48,sigma_49,sigma_50,sigma_51,Mean_Flux,Average_Error,Jitter,n_outliers,cpcb,eb,curveErrorL1,curveErrorL2,straightLineErrorL1,straightLineErrorL2,curtilErrorL1,curtilErrorL2,flatLineErrorL1,flatLineErrorL2
321,1430219,0.998777,0.998238,0.999569,0.995223,0.995335,0.994190,0.992815,0.993217,1.023491,0.981392,0.989215,0.963209,1.019010,0.924263,0.923979,1.004256,1.057660,0.977195,1.004551,0.989168,1.076513,1.016960,0.947603,1.000043,0.965728,1.013030,0.914845,0.995744,0.986228,1.013273,1.004559,1.066545,1.000000,0.929178,0.999957,0.968605,1.029412,0.945257,1.038665,1.089336,0.995449,1.025513,1.016788,1.058271,0.995731,0.917295,1.004426,0.965472,1.029727,0.937517,1.035149,1.098274,0.050955,0.023629,0.058846,0.011335,0.000829,0.000614,0.001060,0.000723,0.001030,0.000643,0.000714,0.000824,0.001956,0.001808,0.002043,0.001304,0.001915,0.002004,0.002036,0.001787,0.002616,0.001549,0.001419,0.002252,0.001561,0.000759,0.001780,0.001770,0.000798,0.001170,0.001702,0.001172,0.000997,0.001484,0.002111,0.001451,0.000658,0.001145,0.001837,0.001005,0.001145,0.001575,0.001347,0.002077,0.001819,0.002605,0.002740,0.001937,0.001168,0.001441,0.002451,0.001303,0.001749,0.002007,0.001715,0.002661,0.041140,0.014286,0.001178,0.033540,-0.072099,-0.069388,-0.097213,-0.130624,-0.120851,0.615015,-0.408298,-0.218142,-0.850262,0.506096,-1.796937,-1.803833,0.147469,1.445556,-0.510303,0.154627,-0.219279,1.903827,0.456253,-1.229604,0.045065,-0.789052,0.360725,-2.025857,-0.059446,-0.290738,0.366638,0.154832,1.661522,0.044012,-1.677460,0.042959,-0.719104,0.758926,-1.286642,0.983854,2.215509,-0.066604,0.664148,0.452070,1.460415,-0.059747,-1.966314,0.151583,-0.795261,0.766587,-1.474768,0.898370,2.432766,0.998189,0.001555,0.041111,0.0,0.0,0.0,816.837480,1.980246e+04,1002.384635,3.343326e+04,810.362774,2.006421e+04,967.034752,3.258251e+04
398,1433410,1.048911,1.064524,1.057709,1.030071,0.971263,1.058893,1.050066,1.020646,1.010013,1.002619,1.000890,0.988621,0.979878,1.087259,0.980716,0.975775,1.076471,0.972014,0.975424,0.966162,0.974959,1.003731,1.005427,0.986671,1.070222,1.075352,1.006643,0.977350,1.040886,0.978811,1.077416,0.993530,1.003565,1.000000,1.044561,0.999110,0.997072,1.086729,0.999378,1.018525,1.070327,0.979354,1.076572,0.912491,0.929326,0.998282,1.004325,1.080467,0.995445,1.000622,0.993402,0.979225,0.026338,0.034372,0.039255,0.038671,0.001202,0.000980,0.000985,0.001044,0.000888,0.000793,0.001033,0.000934,0.001548,0.002104,0.001860,0.001754,0.001593,0.002279,0.002484,0.001235,0.002570,0.001397,0.001671,0.002889,0.001969,0.000539,0.001578,0.001402,0.000967,0.000948,0.001792,0.000950,0.001285,0.001889,0.002907,0.002049,0.000570,0.001376,0.001538,0.000855,0.001399,0.001763,0.001122,0.002461,0.002108,0.002565,0.003598,0.002401,0.001468,0.001393,0.002088,0.001437,0.001782,0.001749,0.001355,0.002265,0.041069,0.873623,1.253798,1.087841,0.414878,-1.017075,1.116679,0.901746,0.185389,-0.073524,-0.253563,-0.295676,-0.594410,-0.807299,1.807374,-0.786906,-0.907200,1.544688,-0.998777,-0.915761,-1.141290,-0.927077,-0.226486,-0.185204,-0.641902,1.392535,1.517437,-0.155589,-0.868860,0.678214,-0.833291,1.567701,-0.474885,-0.230533,-0.317339,0.767702,-0.339002,-0.388642,1.794482,-0.332475,0.133737,1.395097,-0.820067,1.547146,-2.448136,-2.038225,-0.359179,-0.212030,1.642000,-0.428248,-0.302203,-0.478007,-0.823206,1.013033,0.001543,0.041040,0.0,0.0,1.0,1029.070878,3.327836e+04,1138.765671,3.917569e+04,1028.134580,3.296132e+04,1202.751873,4.185524e+04
419,1433980,0.867146,0.977559,1.003355,1.003966,0.998171,0.958203,0.999825,0.986828,1.002309,1.033100,1.010777,1.021393,0.994942,1.006036,1.008467,0.999788,0.995052,0.998918,1.001082,0.995002,1.000000,1.011083,0.977378,0.976334,1.024927,1.005058,1.012287,0.969689,0.980683,1.005322,1.006596,1.006375,0.990470,0.928291,0.983960,0.987874,0.976649,1.007750,0.979542,1.000212,1.007827,0.993569,1.006736,1.002040,0.922076,0.947734,1.017343,1.024822,0.993180,0.981941,0.994638,1.006133,0.026593,0.017472,0.012453,0.007846,0.000718,0.000842,0.000710,0.000698,0.000633,0.000820,0.000894,0.001018,0.001812,0.001781,0.001825,0.001993,0.001978,0.002267,0.002389,0.001138,0.001925,0.001495,0.001282,0.002456,0.001606,0.000734,0.001587,0.001397,0.001027,0.000948,0.001664,0.000789,0.002143,0.002593,0.003014,0.001758,0.001105,0.001162,0.001283,0.000908,0.000965,0.001451,0.001357,0.001921,0.003000,0.003311,0.004041,0.002100,0.001493,0.001510,0.001971,0.001116,0.001689,0.001855,0.001715,0.002220,0.027425,-4.557168,-0.531079,0.409533,0.431821,0.220519,-1.236876,0.280818,-0.193109,0.371373,1.494159,0.680150,1.067255,0.102746,0.507282,0.595933,0.279461,0.106780,0.247742,0.326651,0.104937,0.287196,0.691334,-0.537691,-0.575749,1.196133,0.471646,0.735210,-0.818053,-0.417186,0.481239,0.527716,0.519662,-0.060312,-2.327594,-0.297667,-0.154950,-0.564258,0.569773,-0.458789,0.294932,0.572597,0.052701,0.532824,0.361582,-2.554218,-1.618607,0.919572,1.192307,0.038506,-0.371287,0.091681,0.510824,0.992124,0.001549,0.027381,0.0,0.0,1.0,752.368839,3.872485e+04,793.116834,4.512542e+04,741.536080,3.869819e+04,792.744012,4.515549e+04
434,1434591,1.061254,1.061764,1.061200,1.060151,1.059514,1.058799,1.058061,1.056200,0.969496,1.001870,1.036248,1.031444,0.993960,0.966231,0.966140,0.992269,1.013575,1.061009,1.025787,0.970187,0.928617,0.959485,1.035050,1.012125,1.005667,0.981464,0.982231,1.048551,1.003734,0.976627,0.967389,1.060273,0.989184,0.997833,0.963599,1.018789,0.994333,1.022868,0.944867,1.032051,0.944690,1.031106,0.996266,1.037080,1.030553,1.000000,0.947328,1.022519,0.980691,0.994257,1.005743,1.033974,0.032502,0.022724,0.028669,0.031535,0.001200,0.001096,0.000985,0.000892,0.000974,0.001041,0.001075,0.001595,0.001750,0.001970,0.002005,0.002129,0.002150,0.001822,0.001667,0.001395,0.002128,0.001446,0.001567,0.002884,0.002162,0.000659,0.001224,0.001538,0.001277,0.001068,0.001195,0.000815,0.001962,0.001532,0.003105,0.002952,0.000607,0.001137,0.000920,0.000802,0.001363,0.001329,0.001468,0.002034,0.002285,0.003019,0.003337,0.002128,0.001446,0.001686,0.001788,0.001523,0.002086,0.001778,0.001356,0.001943,0.036595,1.435217,1.449154,1.433752,1.405078,1.387655,1.368135,1.347969,1.297116,-1.072188,-0.187523,0.751901,0.620608,-0.403689,-1.161422,-1.163906,-0.449903,0.132327,1.428528,0.466035,-1.053327,-2.189274,-1.345772,0.719150,0.092707,-0.083776,-0.745163,-0.724189,1.088077,-0.136606,-0.877342,-1.129789,1.408405,-0.534189,-0.297850,-1.233351,0.274788,-0.393494,0.386266,-1.745232,0.637209,-1.750062,0.611392,-0.340664,0.774629,0.596265,-0.238635,-1.677961,0.376737,-0.766283,-0.395580,-0.081690,0.689762,1.008733,0.001535,0.036562,0.0,0.0,0.0,846.806098,2.459788e+04,1085.617642,3.424598e+04,825.985610,2.310322e+04,1180.850026,4.306658e+04
463,1569822,1.006425,1.005080,1.003566,1.001521,1.000562,0.998206,0.996386,0.992100,1.000000,1.001592,1.001660,0.974758,0.968558,0.988382,0.988406,0.948817,1.016515,1.049398,0.955806,0.953924,1.040489,0.991316,1.002747,1.025817,0.989356,1.020955,1.012543,0.969286,1.049750,0.995981,1.045692,0.950723,0.932150,1.007687,0.991111,0.949740,1.006053,0.993363,1.026645,1.052081,1.004019,1.058959,0.962823,0.987549,1.049555,0.988072,0.998340,1.015157,1.021517,1.029322,1.006637,0.981891,0.032215,0.021530,0.024811,0.038202,0.000725,0.000625,0.000661,0.000570,0.000664,0.000626,0.000655,0.000843,0.001577,0.001824,0.002169,0.001711,0.002234,0.002444,0.002574,0.001793,0.002108,0.000851,0.001374,0.001585,0.001271,0.000744,0.001272,0.001429,0.000921,0.000820,0.001174,0.000873,0.001518,0.001625,0.002164,0.001085,0.000480,0.001105,0.001032,0.000789,0.001287,0.001080,0.001134,0.001700,0.002372,0.002205,0.002921,0.002045,0.001160,0.001518,0.002096,0.001111,0.001572,0.001546,0.001576,0.002272,0.028704,0.217803,0.170946,0.118214,0.046984,0.013551,-0.068536,-0.131912,-0.281254,-0.006021,0.049429,0.051811,-0.885412,-1.101415,-0.410776,-0.409951,-1.789155,0.569337,1.714947,-1.545687,-1.611228,1.404540,-0.308574,0.089682,0.893389,-0.376860,0.724019,0.430953,-1.076051,1.727212,-0.146041,1.585830,-1.722749,-2.369803,0.261771,-0.315698,-1.756999,0.204865,-0.237242,0.922267,1.808415,0.133998,2.048015,-1.301198,-0.439804,1.720405,-0.421592,-0.063854,0.522018,0.743610,1.015519,0.225199,-0.636920,1.000173,0.001331,0.028673,0.0,0.0,0.0,806.589630,3.395486e+04,965.409137,4.536088e+04,814.707618,3.209228e+04,929.934818,4.352743e+04
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
126683,10191056,1.007564,1.005533,0.843477,1.004909,1.007128,1.011577,1.011781,0.972977,0.989518,1.005381,1.013999,1.005957,0.996505,0.999696,1.000304,0.940214,0.970940,0.998531,1.002312,0.998368,0.998109,1.002733,0.893373,1.020428,1.002171,0.997829,0.997316,0.954810,1.005909,1.002238,0.996582,1.000700,0.833053,1.002500,0.893547,1.009107,0.996360,1.005677,1.009104,1.003472,1.006590,0.997089,1.001469,1.000000,1.002283,0.999553,1.018520,0.975943,0.992872,1.015857,1.011699,0.974534,0.055840,0.031796,0.021008,0.007031,0.000517,0.000550,0.000885,0.000640,0.000658,0.000824,0.000580,0.000684,0.001423,0.001599,0.001788,0.001448,0.001957,0.001097,0.001588,0.001536,0.001516,0.000577,0.000874,0.001087,0.001321,0.000676,0.000749,0.001108,0.001325,0.001185,0.001250,0.000753,0.000850,0.000863,0.001378,0.000992,0.000527,0.000767,0.001595,0.000648,0.000819,0.000987,0.000772,0.001118,0.001216,0.001472,0.001636,0.001325,0.000853,0.001203,0.001378,0.001263,0.001195,0.001101,0.000865,0.001378,0.001815,-1.247083,-0.865339,-0.881926,-0.559377,-0.656015,-0.637967,-0.690945,-0.246630,-1.070168,-0.803725,1.277959,0.535431,0.089083,-1.025796,-1.132443,-0.352424,-0.516370,1.102019,0.077703,-2.532187,-0.280714,-0.003289,0.044310,-1.107402,0.144683,-0.586914,-0.796280,0.441044,1.425285,0.010917,-2.057229,0.008694,0.615833,0.874088,-0.461644,0.560763,-0.465919,-0.919629,0.600143,0.984345,2.165164,1.210089,-0.823934,1.054239,1.869798,1.761327,-0.419361,0.757846,-0.000463,0.443385,1.353252,1.733772,0.999920,0.001140,0.001412,0.0,0.0,1.0,1343.235790,1.312248e+05,1465.052619,1.538269e+05,1357.788295,1.310727e+05,1457.452512,1.524659e+05
126685,10191142,0.963556,0.964351,0.964981,0.965836,0.966475,0.967391,0.968004,0.971596,1.056698,0.977148,0.993252,0.968475,0.978528,0.949534,0.948951,0.958982,0.977539,0.989157,0.964524,1.020715,1.000000,1.053604,1.049512,0.993535,1.037845,0.998376,0.913843,0.987632,1.018055,1.025359,1.010843,0.984123,0.937441,0.978081,1.019660,1.037356,1.035579,1.048710,1.012368,1.024804,0.975513,1.055572,0.988792,1.016076,1.013809,0.959996,1.001624,1.022869,0.972715,1.035496,1.080237,1.043908,0.034492,0.022421,0.025201,0.024717,0.000623,0.000499,0.000816,0.000676,0.000703,0.000733,0.000563,0.000572,0.001021,0.001396,0.001538,0.001299,0.001769,0.001046,0.001372,0.001678,0.001933,0.000789,0.000896,0.001156,0.001036,0.000758,0.000769,0.000939,0.000833,0.000960,0.001126,0.000950,0.000993,0.001004,0.001516,0.000868,0.000648,0.001015,0.000967,0.000689,0.000970,0.001195,0.001109,0.001229,0.001580,0.001662,0.001469,0.001382,0.001025,0.001007,0.001666,0.001283,0.001320,0.001529,0.001057,0.001347,0.008950,-0.336835,0.252800,0.120063,0.197002,0.104286,0.141239,-0.011348,0.071337,-1.305475,0.176619,0.077080,-0.567407,0.100276,1.583143,1.373556,-0.767639,-0.299652,0.019151,-0.504243,-0.100777,-2.959050,-0.943177,1.720270,-0.903453,0.641262,-0.188942,-0.304574,-0.135722,-0.359502,-1.081628,-0.917190,-1.188003,-2.034923,-0.040813,-1.741704,-0.158707,-0.413073,1.591210,1.091891,-1.364948,0.990246,0.138638,0.441784,0.698392,0.688996,1.560078,1.059642,1.737588,0.806346,1.962702,0.054095,-0.770910,1.000365,0.001193,0.008870,0.0,0.0,0.0,1147.697291,3.980616e+04,1207.168680,5.763675e+04,1057.714933,3.913423e+04,1643.092788,7.777024e+04
126828,10197260,0.969492,0.978168,0.989891,1.006314,1.013298,1.026474,1.036965,1.055449,0.987797,1.073919,1.014397,1.058071,1.010749,0.991602,0.998963,1.020545,0.978991,0.958004,0.993847,1.006153,1.101147,0.942073,1.080854,1.010495,0.961832,0.963276,1.041900,1.052424,1.052491,1.034712,0.981391,1.012695,0.961085,1.021813,0.960211,1.010731,0.986436,0.985185,0.983159,0.941608,1.007773,0.991986,0.979521,0.945567,0.976996,1.000000,0.943630,1.011964,0.989505,1.001037,1.025857,1.032720,0.045794,0.027455,0.029917,0.029147,0.000655,0.000641,0.000562,0.000742,0.000625,0.000704,0.000817,0.000575,0.001418,0.001632,0.001310,0.001145,0.001293,0.001281,0.001163,0.001774,0.001885,0.000835,0.000711,0.001169,0.000772,0.000556,0.000952,0.000683,0.000619,0.000945,0.000657,0.000909,0.001151,0.001134,0.001383,0.000937,0.000566,0.000756,0.000851,0.000490,0.000849,0.001157,0.000616,0.001052,0.001725,0.001641,0.001833,0.001765,0.001098,0.001360,0.001304,0.001231,0.001698,0.001448,0.000955,0.001276,0.005512,-1.810759,-0.642939,-1.616909,-1.913170,-1.485529,-1.078047,-1.206470,-1.681595,0.176647,-0.122221,-0.175732,0.084498,1.474997,0.088260,-0.080435,0.456405,-0.893968,0.397913,-0.349071,-0.021299,0.517381,0.119958,0.214589,0.870093,-0.473511,-0.752692,-0.606948,0.421745,-1.100794,1.034914,1.029698,1.411149,1.182655,0.668140,2.899171,1.381887,0.268797,0.643166,1.085140,-0.292244,-0.025656,1.514309,0.374594,-0.150403,-0.410429,-0.676491,0.928507,-0.743544,-1.848671,-0.366632,0.265035,1.016509,0.999026,0.001076,0.005406,0.0,0.0,0.0,1501.512596,7.366304e+04,1639.820037,9.210175e+04,1468.632159,7.263273e+04,1651.174316,9.374026e+04
127017,10200932,0.999844,0.988647,0.994805,0.995871,0.991074,0.990669,0.985551,0.989769,1.079894,1.073983,0.985680,0.994171,0.991831,0.993619,0.994122,0.980178,0.998496,0.994063,0.993191,0.993628,1.045845,1.045759,1.044953,0.996262,0.995436,1.003800,1.004784,0.986505,1.002992,1.002981,1.007452,0.996880,0.998837,1.000000,1.015508,1.002808,0.998316,1.008658,1.002215,0.996116,0.999933,1.000067,1.003824,0.975866,0.985646,0.965453,1.001684,1.003248,1.009377,1.001504,1.003482,1.006346,0.006784,0.005287,0.006286,0.003462,0.000727,0.000707,0.000762,0.000618,0.000821,0.000832,0.000852,0.000563,0.001218,0.000908,0.001337,0.001177,0.001321,0.001189,0.001154,0.001147,0.001778,0.000614,0.000510,0.001221,0.000689,0.000466,0.000778,0.000824,0.000533,0.000865,0.000913,0.000862,0.000944,0.000999,0.001907,0.000482,0.000406,0.000616,0.000875,0.000756,0.000708,0.001122,0.000725,0.000870,0.001543,0.002001,0.002351,0.000734,0.000801,0.001204,0.001436,0.001179,0.001326,0.001662,0.001241,0.001358,0.001259,0.556040,0.334436,0.202385,1.459000,0.338175,-0.018176,0.063661,0.085502,-1.110023,-0.893059,0.118753,inf,-2.149699,-0.912391,-0.692152,-1.579675,-0.697204,0.263348,0.667419,1.008351,-0.052163,-1.538789,-0.567425,0.542004,-1.784036,-1.363922,-0.770858,-0.665685,-0.761711,-0.489040,0.273181,0.440435,-0.880964,0.880164,1.868677,0.213574,-0.000669,0.666914,1.076523,0.917939,-0.681756,-0.189828,-0.948151,0.635073,1.839879,1.439177,-1.949330,0.955310,-0.103657,1.538568,0.561359,1.854516,1.000066,0.001278,0.000000,1.0,0.0,0.0,728.377567,2.259945e+04,811.898041,3.277920e+04,743.465255,2.009927e+04,775.322439,3.297461e+04


In [31]:
df['curveErrorL2'].mean()

491.52603003761783

In [None]:
# Visualize plots

i = 0
data = non_var()
# data = data[data['eb'] == 0]
print(len(data))
for kic in data['KIC']:
    # visualize(kic)
    if i < 10:
        visualize(kic)
    i += 1

In [58]:
# Add two-character flags that indicate each type of variability
# st = short term, mt = medium term, lt = long term, eb = eclipsing binary, nv = non variable

save = df[['KIC']]
save['vt'] = None

funcs = [short_term(), mid_term(), long_term(), eb(), non_var()]
labels = ['st', 'mt', 'lt', 'eb', 'nv']

for i in range(len(funcs)):
    flag = funcs[i]
    flag = pd.merge(df, flag, how='inner', on='KIC')
    for kic in tqdm(flag['KIC']):
        save.at[save[save['KIC'] == kic].index[0], 'vt'] = labels[i]

  0%|          | 0/8737 [00:00<?, ?it/s]

  0%|          | 0/5147 [00:00<?, ?it/s]

  0%|          | 0/11389 [00:00<?, ?it/s]

  0%|          | 0/1870 [00:00<?, ?it/s]

  0%|          | 0/100010 [00:00<?, ?it/s]

In [60]:
save.to_csv("../output/labels.csv", index=False)