In [None]:
DATA_PATH = '../data/'
LIGHTCURVES_PATH = DATA_PATH + 'lightcurves/'
FEATURES_PATH = DATA_PATH + 'features/'

In [None]:
import numpy as np
import pandas as pd
import measurements, extract
import matplotlib.pyplot as plt

In [None]:
np.random.seed(42)

Import transient lightcurves

In [None]:
filename = 'transient_lightcurves.pickle'
indir = LIGHTCURVES_PATH; filepath = indir + filename
df_tra = pd.read_pickle(filepath)
df_tra.shape

Filter transient lightcurves

In [None]:
# Minimum number of observations for each light curve used
min_obs = 10

In [None]:
# Delete rows of blended observations
df_tra = df_tra.drop_duplicates(['TransientID','MJD'], keep='first')
# Add observation count to every transient
df_count = df_tra.groupby('TransientID', as_index=False).count()
df_count['ObsCount'] = df_count['Mag']
df_count = df_count[['TransientID', 'ObsCount']]
df_tra = df_tra.merge(df_count, how='inner')
# Remove objects with less than min_obs
df_tra = df_tra[df_tra.ObsCount >= min_obs]

Import non-transient light curves

In [None]:
filename = 'nontransient_lightcurves.pickle'
indir = LIGHTCURVES_PATH; filepath = indir + filename
df_nont = pd.read_pickle(filepath)
df_nont.shape

Filter non-transient lightcurves

In [None]:
# Delete rows of blended observations
df_nont = df_nont.drop_duplicates(['ID','MJD'], keep='first')
# Add observation count to every nontransient
df_count = df_nont.groupby('ID', as_index=False).count()
df_count['ObsCount'] = df_count['Mag']
df_count = df_count[['ID', 'ObsCount']]
df_nont = df_nont.merge(df_count, how='inner')
# Remove nontransient objects with less than 5 observations
df_nont = df_nont[df_nont.ObsCount >= min_obs]
df_nont.shape

In [None]:
# Sample non-transient subset of same size as transients
sample_size = df_tra.TransientID.unique().shape[0]
IDs = np.random.choice(df_nont.ID.unique(), size=sample_size, replace=False)
df_nont = df_nont[df_nont.ID.isin(IDs)]
df_nont.ID.unique().shape, df_nont.shape

Feature dict creation method

In [None]:
def feature_dict(num_features=21):
    features = [
        'ID', 'skew', 'std', 'kurtosis', 'beyond1st', 'stetson_j', 'stetson_k', 'max_slope',
        'amplitude', 'median_absolute_deviation', 'median_buffer_range_percentage', 'pair_slope_trend',
         'flux_percentile_ratio_mid20', 'flux_percentile_ratio_mid35', 'flux_percentile_ratio_mid50',
         'flux_percentile_ratio_mid65', 'flux_percentile_ratio_mid80', 'percent_amplitude',
         'percent_difference_flux_percentile', 'linear_trend', 'percent_difference_flux_percentile', 'linear_trend'
    ]
    if num_features > 21:
        features.append(['poly1_a','poly2_a','poly2_b','poly3_a','poly3_b','poly3_c'])
    if num_features > 27:
         features.append(['poly4_a', 'poly4_b', 'poly4_c', 'poly4_d'])
    return { k:[] for k in features}

In [None]:
# Define number of features to be extracted
num_features = 21

Extract transient features

In [None]:
# Create empty feature dict
tran_feats = feature_dict(num_features)
for trID in df_tra.TransientID.unique():
    # Get current object light curve
    df = df_tra[df_tra.TransientID == trID]
    # Get features
    obj_feats = extract.features(df, feature_dict)
    # Append features
    for k,v in tran_feats.items():
        if k != 'ID': tran_feats[k].append(obj_feats[k])
    tran_feats['ID'].append(trID)
# Create feature dataframe
df_feat_tran = pd.DataFrame(tran_feats)

Count number of features

In [None]:
df_feat_tran.count()

Count number of unique features

In [None]:
df_feat_tran.T.apply(lambda x: x.nunique(), axis=1)

Save transient features

In [None]:
num_features = df_feat_tran.shape[1]-1
outdir = FEATURES_PATH
filename = 'transient_{}feats_{}obs.pickle'.format(min_obs, num_features) 
outpath = outdir + filename
df_feat_tran.to_pickle(outpath)

Extract nontransient features

In [None]:
# Create empty feature dict
nontran_feats = feature_dict(num_features)
for ID in df_nont.ID.unique():
    # Get current object light curve
    df = df_nont[df_nont.ID == ID]
    # Get features
    obj_feats = extract.features(df, feature_dict)
    # Append features
    for k,v in nontran_feats.items():
        if k != 'ID': nontran_feats[k].append(obj_feats[k])
    nontran_feats['ID'].append(trID)
# Create feature dataframe
df_feat_nontran = pd.DataFrame(nontran_feats)

Count number of features

In [None]:
df_feat_nontran.count()

Count number of unique features

In [None]:
df_feat_nontran.T.apply(lambda x: x.nunique(), axis=1)

Save nontransient features

In [None]:
num_features = df_feat_nontran.shape[1]-1
outdir = FEATURES_PATH
filename = 'nontransient{}feats_{}obs.pickle'.format(min_obs, num_features) 
outpath = outdir + filename
df_feat_nontran.to_pickle(outpath)