In [1]:
DATA_PATH = '../../data/CRTS2/'

In [4]:
sys.path.append("../..")

In [5]:
import pandas as pd
import sys
import numpy as np
import measurements
import astropy.time as astime

Import transient lightcurves

In [6]:
filename = 'transient_lightcurves.pickle'
indir = DATA_PATH; filepath = indir + filename
df_tra = pd.read_pickle(filepath)
df_tra.shape

(451474, 4)

Filter transient lightcurves

In [7]:
# Delete rows of blended observations
df_tra = df_tra.drop_duplicates(['TransientID','MJD'], keep='first')

In [8]:
# Add observation count to every transient
df_count = df_tra.groupby('TransientID', as_index=False).count()
df_count['ObsCount'] = df_count['Mag']
df_count = df_count[['TransientID', 'ObsCount']]
df_tra = df_tra.merge(df_count, how='inner')

In [9]:
# Remove objects with less than 5 observations
df_tra = df_tra[df_tra.ObsCount >= 5]

Import permanent lightcurves

In [96]:
filename = 'permanent_lightcurves.pickle'
indir = DATA_PATH; filepath = indir + filename
df_per = pd.read_pickle(filepath)
df_per.shape

(1924409, 4)

In [97]:
# Delete rows of blended observations
df_per = df_per.drop_duplicates(['ID','MJD'], keep='first')
df_per.shape

(1802695, 4)

In [98]:
# Add observation count to every permanent
df_count = df_per.groupby('ID', as_index=False).count()
df_count['ObsCount'] = df_count['Mag']
df_count = df_count[['ID', 'ObsCount']]
df_per = df_per.merge(df_count, how='inner')

In [99]:
# Remove objects with less than 5 observations
df_per = df_per[df_per.ObsCount >= 5]
df_per.shape

(1798465, 5)

In [100]:
df_per.ID.unique().shape

(15193,)

In [105]:
# Sample subset of same size as transients
sample_size = df_tra.TransientID.unique().shape[0]
IDs = np.random.choice(df_per.ID.unique(), size=sample_size, replace=False)
df_per = df_per[df_per.ID.isin(IDs)]
df_per.shape

(501967, 5)

 Define extract features functionality

In [106]:
def extract_features(df, feature_dict):
    df = df.copy()
    df['Flux'] = measurements.__mag_to_flux__(df.Mag)
    df['Date'] = astime.Time(df.MJD, format='mjd').datetime
    df = df.sort_values('Date')
    feature_dict['skew'].append(measurements.skew(df.Mag))
    feature_dict['kurtosis'].append(measurements.kurtosis(df.Mag))
    feature_dict['std'].append(measurements.std(df.Mag))
    feature_dict['beyond1st'].append(measurements.beyond1st(df.Mag, df.Magerr))
    feature_dict['stetson_j'].append(measurements.stetson_j(df.Mag, df.Magerr, df.Date))
    feature_dict['stetson_k'].append(measurements.stetson_k(df.Mag, df.Magerr))
    feature_dict['max_slope'].append(measurements.max_slope(df.Mag, df.Date))
    feature_dict['amplitude'].append(measurements.amplitude(df.Mag))
    feature_dict['median_absolute_deviation'].append(measurements.median_absolute_deviation(df.Mag))
    feature_dict['median_buffer_range_percentage'].append(measurements.median_buffer_range_percentage(df.Flux))
    feature_dict['pair_slope_trend'].append(measurements.pair_slope_trend(df.Mag, df.Date))
    feature_dict['flux_percentile_ratio_mid20'].append(measurements.flux_percentile_ratio_mid20(df.Flux))
    feature_dict['flux_percentile_ratio_mid35'].append(measurements.flux_percentile_ratio_mid35(df.Flux))
    feature_dict['flux_percentile_ratio_mid50'].append(measurements.flux_percentile_ratio_mid50(df.Flux))
    feature_dict['flux_percentile_ratio_mid65'].append(measurements.flux_percentile_ratio_mid65(df.Flux))
    feature_dict['flux_percentile_ratio_mid80'].append(measurements.flux_percentile_ratio_mid80(df.Flux))
    feature_dict['percent_amplitude'].append(measurements.percent_amplitude(df.Flux))
    feature_dict['percent_difference_flux_percentile'].append(measurements.percent_difference_flux_percentile(df.Flux))
    feature_dict['linear_trend'].append(measurements.linear_trend(df.Flux, df.Date))

Extract transient features

In [12]:
feature_dict = {'ID':[], 'skew':[], 'std':[], 'kurtosis':[], 'beyond1st':[],'stetson_j':[], 'stetson_k':[], 'max_slope':[],'amplitude':[], 'median_absolute_deviation':[], 'median_buffer_range_percentage':[], 'pair_slope_trend':[], 'flux_percentile_ratio_mid20':[], 'flux_percentile_ratio_mid35':[], 'flux_percentile_ratio_mid50':[], 'flux_percentile_ratio_mid65':[], 'flux_percentile_ratio_mid80':[], 'percent_amplitude':[], 'percent_difference_flux_percentile':[], 'linear_trend':[]}
for trID in df_tra.TransientID.unique():
    df = df_tra[df_tra.TransientID == trID]
    feature_dict['ID'].append(trID)
    extract_features(df, feature_dict)
df_feat_tran = pd.DataFrame(feature_dict)

In [113]:
df_feat_tran.count()

ID                                    4384
amplitude                             4384
beyond1st                             4384
flux_percentile_ratio_mid20           4384
flux_percentile_ratio_mid35           4384
flux_percentile_ratio_mid50           4384
flux_percentile_ratio_mid65           4384
flux_percentile_ratio_mid80           4384
kurtosis                              4384
linear_trend                          4384
max_slope                             4384
median_absolute_deviation             4384
median_buffer_range_percentage        4384
pair_slope_trend                      4384
percent_amplitude                     4384
percent_difference_flux_percentile    4384
skew                                  4384
std                                   4384
stetson_j                             4384
stetson_k                             4384
dtype: int64

Save transient features

In [114]:
outdir = DATA_PATH
filename = 'transient_features.pickle' 
outpath = outdir + filename
df_feat_tran.to_pickle(outpath)

Extract permanent features

In [107]:
feature_dict = {'ID':[], 'skew':[], 'std':[], 'kurtosis':[], 'beyond1st':[],'stetson_j':[], 'stetson_k':[], 'max_slope':[],'amplitude':[], 'median_absolute_deviation':[], 'median_buffer_range_percentage':[], 'pair_slope_trend':[], 'flux_percentile_ratio_mid20':[], 'flux_percentile_ratio_mid35':[], 'flux_percentile_ratio_mid50':[], 'flux_percentile_ratio_mid65':[], 'flux_percentile_ratio_mid80':[], 'percent_amplitude':[], 'percent_difference_flux_percentile':[], 'linear_trend':[]}
for ID in df_per.ID.unique():
    df = df_per[df_per.ID == ID]
    feature_dict['ID'].append(ID)
    extract_features(df, feature_dict)
df_feat_perm = pd.DataFrame(feature_dict)

In [115]:
df_feat_perm.count()

ID                                    4384
amplitude                             4384
beyond1st                             4384
flux_percentile_ratio_mid20           4384
flux_percentile_ratio_mid35           4384
flux_percentile_ratio_mid50           4384
flux_percentile_ratio_mid65           4384
flux_percentile_ratio_mid80           4384
kurtosis                              4384
linear_trend                          4384
max_slope                             4384
median_absolute_deviation             4384
median_buffer_range_percentage        4384
pair_slope_trend                      4384
percent_amplitude                     4384
percent_difference_flux_percentile    4384
skew                                  4384
std                                   4384
stetson_j                             4384
stetson_k                             4384
dtype: int64

Save permanent features

In [116]:
outdir = DATA_PATH
filename = 'permanent_features.pickle' 
outpath = outdir + filename
df_feat_perm.to_pickle(outpath)