In [1]:
import os 
import numpy as np
import pandas as pd
import pickle 
from astropy.table import Table

In [2]:
train_data_head_path = '/sps/lsst/users/bbiswas/data/kilonova_datasets/train_final_master_HEAD.FITS'
train_data_phot_path = '/sps/lsst/users/bbiswas/data/kilonova_datasets/train_final_master_PHOT.FITS'
test_data_head_path = '/sps/lsst/users/bbiswas/data/kilonova_datasets/test_final_master_HEAD.FITS'
test_data_phot_path = '/sps/lsst/users/bbiswas/data/kilonova_datasets/test_final_master_PHOT.FITS'

In [3]:
df_header = Table.read(train_data_head_path, format='fits').to_pandas()
df_phot = Table.read(train_data_phot_path, format='fits').to_pandas()

In [4]:
from kndetect.utils import load_pcs, get_event_type, get_data_dir_path
from kndetect.features import extract_features_all_lightcurves
from kndetect.training import append_y_true_col

In [5]:
pcs = load_pcs()
data_dir = get_data_dir_path()

In [6]:
use_already_trained_features = True

In [7]:
if use_already_trained_features:
    train_features_df = pd.read_csv(os.path.join(data_dir, "train_features.csv"),index_col=0)

else:
    train_features_df = extract_features_all_lightcurves(df_phot, 
                                                         "SNID", 
                                                         pcs, 
                                                         [b'g', b'r'])
    train_features_df = append_y_true_col(features_df=train_features_df,
                                          prediction_type_nos=[149, 150, 151],
                                          meta_df = df_header, 
                                          meta_key_col_name = "SNID",
                                          meta_type_col_name = "SNTYPE")

In [8]:
train_features_df

Unnamed: 0,coeff1_g,coeff2_g,coeff3_g,residuo_g,maxflux_g,coeff1_r,coeff2_r,coeff3_r,residuo_r,maxflux_r,key,type,type_names,y_true,y_pred,y_score
0,-1.445163e-09,2.607948e-03,8.730999e-01,0.885027,230.670456,-6.113288e-09,5.740579e-10,8.306561e-01,1.486240,373.748047,1757,150,150: KN GW170817,True,True,0.844004
1,3.548456e-01,-6.361100e-09,7.598077e-01,1.038731,427.853790,7.659619e-01,-5.251119e-09,2.726788e-01,2.054284,788.683228,6415,141,141: 91BG,False,False,0.000000
2,9.648955e-01,2.575183e-01,-1.083929e-10,0.941676,270.629425,8.970144e-01,2.048207e-11,9.012189e-02,1.150067,518.246521,7707,103,103: Core collapse Type Ibc,False,False,0.000000
3,0.000000e+00,0.000000e+00,0.000000e+00,0.000000,0.000000,3.124574e-03,-1.831540e-12,1.097911e+00,0.865141,534.019775,8267,151,151: KN Karsen 2017,True,True,0.922533
4,4.132821e-01,-5.569791e-09,6.755659e-01,1.064655,263.286224,3.814125e-01,-2.046229e-01,6.647399e-01,1.000773,291.099426,12578,102,102: MOSFIT-Ibc,False,False,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22275,1.018560e+00,-6.214810e-10,3.294138e-08,1.756306,888.318420,1.048414e+00,8.001244e-01,-2.148909e-09,2.026614,666.974243,137062467,112,112: Core collapse Type II,False,False,0.000000
22276,8.376595e-01,-1.307256e-09,1.249076e-01,1.517219,1141.677979,9.648917e-01,3.121802e-03,-1.641931e-11,1.321579,1213.934937,137071784,170,170: AGN,False,False,0.000000
22277,7.701429e-01,7.291078e-13,2.541589e-01,1.195978,295.870941,8.503450e-01,-6.111624e-10,1.231172e-01,0.745460,408.148193,137071978,143,143: Iax,False,False,0.000000
22278,-5.448147e-09,6.498963e-02,7.854022e-01,1.440381,525.301697,3.611119e-11,1.234066e-02,1.524088e+00,0.950910,445.314880,137079473,151,151: KN Karsen 2017,True,True,0.845227


In [9]:
train_features_df.to_csv(os.path.join(data_dir, "train_features.csv"))

# Now Train the classifier

In [10]:
from kndetect.training import train_classifier

In [11]:
clf, features_df = train_classifier(train_features_df)

In [12]:
with open(os.path.join(data_dir, 'complete.pkl'), 'wb') as files:
    pickle.dump(clf, files)

In [13]:
# Generate Test features and Predict probabilities

In [14]:
data_dir_path = get_data_dir_path()

In [15]:
df_header_test = Table.read(test_data_head_path, format='fits').to_pandas()
df_phot_test = Table.read(test_data_phot_path, format='fits').to_pandas()

In [16]:
if use_already_trained_features:
    test_features_df = pd.read_csv(os.path.join(data_dir, "test_features.csv"),index_col=0)

else:
    test_features_df = extract_features_all_lightcurves(df_phot_test, 
                                                         "SNID", 
                                                         pcs, 
                                                         [b'g', b'r'])
    test_features_df = append_y_true_col(features_df=test_features_df,
                                          prediction_type_nos=[149, 150, 151],
                                          meta_df = df_header_test, 
                                          meta_key_col_name = "SNID",
                                          meta_type_col_name = "SNTYPE")

In [17]:
from kndetect.predict import load_classifier, predict_kn_score

In [18]:
clf1=load_classifier("complete.pkl")

In [19]:
probabilities, filtered_indices = predict_kn_score(clf1, test_features_df)

In [20]:
test_features_df['y_pred_score'] = probabilities.T[1]

In [21]:
test_features_df

Unnamed: 0,coeff1_g,coeff2_g,coeff3_g,residuo_g,maxflux_g,coeff1_r,coeff2_r,coeff3_r,residuo_r,maxflux_r,key,type,type_names,y_true,y_pred_score
0,1.732623e-09,-2.509101e-11,1.097553e+00,1.938472,3813.011456,4.975693e-09,-2.662738e-09,1.250420e+00,4.353821,5518.907806,1612,149,149: KN GRANDMA,True,0.699136
1,1.593189e+00,1.239720e-01,-2.053517e-01,5.750576,1215.978638,1.102308e+00,-3.386281e-09,-8.771612e-09,9.126114,1900.364746,10871,162,162: ILOT,False,0.000000
2,-1.025571e-09,-3.375859e-09,1.044571e+00,1.317318,764.863892,-1.401216e-09,-3.309492e-09,1.104302e+00,0.919006,805.522644,10872,150,150: KN GW170817,True,0.821185
3,-4.666286e-01,-1.631006e-09,1.739795e+00,13.765211,2329.209717,3.035286e-06,1.190303e-09,7.239731e-01,11.834150,1588.494263,11422,180,180: RRLyrae,False,0.066667
4,-2.676815e-01,-6.766489e-10,8.081538e-01,41.273416,20902.640625,-1.497712e-01,2.239404e-01,8.265010e-01,35.546320,11635.100586,13390,180,180: RRLyrae,False,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21283,-3.042166e-09,1.438937e+00,1.222298e+00,4.003843,1325.314697,1.071865e+00,1.785420e-01,1.248190e-09,1.808287,1154.043457,137049400,183,183: PHOEBE,False,0.072095
21284,1.925799e-01,3.392688e-01,6.042987e-01,1.606676,229.678802,-1.666384e-09,-1.315977e-09,8.413959e-01,1.900921,785.573853,137051059,181,M 181: dwarf_flares,False,0.610047
21285,7.285402e-01,-4.506676e-10,2.336910e-01,1.023906,374.421295,1.046572e+00,-1.830059e-09,-1.155196e-08,1.273695,341.236908,137054088,143,143: Iax,False,0.000000
21286,-1.846909e-03,-4.560111e-09,9.970787e-01,1.367441,1000.960266,-2.760333e-09,-3.718945e-09,9.958187e-01,2.048643,3378.102295,137071432,181,M 181: dwarf_flares,False,0.773226


In [22]:
test_features_df.to_csv(os.path.join(data_dir, "test_features.csv"))