In [None]:
DATA_PATH = '../data/'
LIGHTCURVES_PATH = DATA_PATH + 'lightcurves/'
FEATURES_PATH = DATA_PATH + 'features/'

In [None]:
import inputs
import numpy as np
import pandas as pd
import measurements, extract
import matplotlib.pyplot as plt

Import transient lightcurves

In [None]:
filename = 'transient_lightcurves.pickle'
indir = LIGHTCURVES_PATH; filepath = indir + filename
df_tra = pd.read_pickle(filepath)
df_tra['ID'] = df_tra.TransientID
df_tra = df_tra.drop('TransientID', axis=1)
df_tra.shape

Filter transient lightcurves

In [None]:
# Delete rows of blended observations
df_tra = df_tra.drop_duplicates(['ID','MJD'], keep='first')
# Add observation count to every transient
df_count = df_tra.groupby('ID', as_index=False).count()
df_count['ObsCount'] = df_count['Mag']
df_count = df_count[['ID', 'ObsCount']]
df_tra = df_tra.merge(df_count, how='inner')
# Remove objects with less than min_obs
df_tra_5 = df_tra[df_tra.ObsCount >= 5]
df_tra_10 = df_tra[df_tra.ObsCount >= 10]
df_tra_5.shape, df_tra_10.shape

In [None]:
df_tra_5.ID.unique().shape[0], df_tra_10.ID.unique().shape[0]

Import non-transient light curves

In [None]:
filename = 'nontransient_lightcurves.pickle'
indir = LIGHTCURVES_PATH; filepath = indir + filename
df_nont = pd.read_pickle(filepath)
df_nont.shape

In [None]:
df_nont.ID.unique().shape[0]

Filter non-transient lightcurves

In [None]:
# Delete rows of blended observations
df_nont = df_nont.drop_duplicates(['ID','MJD'], keep='first')
# Add observation count to every nontransient
df_count = df_nont.groupby('ID', as_index=False).count()
df_count['ObsCount'] = df_count['Mag']
df_count = df_count[['ID', 'ObsCount']]
df_nont = df_nont.merge(df_count, how='inner')
# Remove nontransient objects with less than 5 observations
df_nont_5 = df_nont[df_nont.ObsCount >= 5]
# Remove nontransient objects with less than 10 observations
df_nont_10 = df_nont[df_nont.ObsCount >= 10]
df_nont_5.shape, df_nont_10.shape

Oversample/balance transient light curves using error as gaussian noise

In [None]:
def oversample(df, copies=0):
    df_oversample = df.copy()
    df_oversample['CopyID'] = '0-' + df_oversample['ID']
    for i in range(1, copies+1):
        df_temp = df.copy()
        df_temp['CopyID'] = '{}-'.format(i) + df_temp['ID']
        df_temp['Mag'] = np.random.normal(df.Mag, df.Magerr)
        df_oversample = df_oversample.append(df_temp, ignore_index=True)
    return df_oversample

In [None]:
# Oversample Data Frames
df_tra_5 = oversample(df_tra_5, 6)
df_tra_10 = oversample(df_tra_10, 6)
df_nont_5 = oversample(df_nont_5, 0)
df_nont_10 = oversample(df_nont_10, 0)

In [None]:
df_tra_5.CopyID.unique().shape, df_tra_10.CopyID.unique().shape, df_nont_5.CopyID.unique().shape, df_nont_10.CopyID.unique().shape

Extract features

In [None]:
def extract_features(df_all):
    # Create empty feature dict
    feats_dict = extract.feature_dict(31)
    copyIDs = []
    for i, copyID in enumerate(df_all.CopyID.unique()):
        if(i%1000 == 0): print(i)
        # Get current object light curve
        df = df_all[df_all.CopyID == copyID]
        # Get features
        obj_feats = extract.features(df, feats_dict)
        # Append features
        for k,v in feats_dict.items():
            if k != 'ID': feats_dict[k].append(obj_feats[k])
        feats_dict['ID'].append(df.ID.iloc[0])
        copyIDs.append(copyID)
    # Create feature dataframe
    feats_dict['CopyID'] = copyIDs 
    return pd.DataFrame(feats_dict)

Save features routine

In [None]:
def save_features(df_feats, min_obs, tipe):
#    print(df_feats.count())
#    print(df_feats.T.apply(lambda x: x.nunique(), axis=1))
    outdir = FEATURES_PATH
    filename_form = 'oversam_{}_{}obs_{}feats.pickle'
    # Save all 31 features
    num_features = df_feats.shape[1]-2
    filename = filename_form.format(tipe, min_obs, num_features) 
    df_feats.to_pickle(outdir + filename)
    # Save all 27 features
    df_feats = df_feats.drop(['poly4_a', 'poly4_b', 'poly4_c', 'poly4_d'], axis=1)
    num_features = df_feats.shape[1]-2
    filename = filename_form.format(tipe, min_obs, num_features) 
    df_feats.to_pickle(outdir + filename)
    # Save all 21 features
    df_feats = df_feats.drop(['poly1_a','poly2_a','poly2_b','poly3_a','poly3_b','poly3_c'], axis=1)
    num_features = df_feats.shape[1]-2
    filename = filename_form.format(tipe, min_obs, num_features) 
    df_feats.to_pickle(outdir + filename)
    # Save all 19 features
    df_feats = df_feats.drop(['small_kurtosis','pair_slope_trend_last_30'], axis=1)
    num_features = df_feats.shape[1]-2
    filename = filename_form.format(tipe, min_obs, num_features) 
    df_feats.to_pickle(outdir + filename)

Generate features routine by extracting and saving them

In [None]:
def generate_features(df_all, min_obs, transient):
    df_feats = extract_features(df_all)
    save_features(df_feats, min_obs, 'transient' if transient else 'nontransient')
    print('Finished task obs={} transient={}'.format(min_obs, transient))

Generate features

In [None]:
generate_features(df_nont_10, 10, False )
generate_features(df_tra_10, 10, True)
generate_features(df_nont_5, 5, False )
generate_features(df_tra_5, 5, True)