https://github.com/dwkim78/upsilon

In [23]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm, tqdm_notebook 
import numpy as np
from IPython.display import display
from scipy import stats
from collections import OrderedDict
import upsilon

In [2]:
train_ts = pd.read_csv("../data/input/training_set.csv")
display(train_ts.head())

Unnamed: 0,object_id,mjd,passband,flux,flux_err,detected
0,615,59750.4229,2,-544.810303,3.622952,1
1,615,59750.4306,1,-816.434326,5.55337,1
2,615,59750.4383,3,-471.385529,3.801213,1
3,615,59750.445,4,-388.984985,11.395031,1
4,615,59752.407,2,-681.858887,4.041204,1


In [3]:
train_meta = pd.read_csv("../data/input/training_set_metadata.csv")
display(train_meta.head())

Unnamed: 0,object_id,ra,decl,gal_l,gal_b,ddf,hostgal_specz,hostgal_photoz,hostgal_photoz_err,distmod,mwebv,target
0,615,349.046051,-61.943836,320.79653,-51.753706,1,0.0,0.0,0.0,,0.017,92
1,713,53.085938,-27.784405,223.525509,-54.460748,1,1.8181,1.6267,0.2552,45.4063,0.007,88
2,730,33.574219,-6.579593,170.455585,-61.548219,1,0.232,0.2262,0.0157,40.2561,0.021,42
3,745,0.189873,-45.586655,328.254458,-68.969298,1,0.3037,0.2813,1.1523,40.7951,0.007,90
4,1124,352.711273,-63.823658,316.922299,-51.059403,1,0.1934,0.2415,0.0176,40.4166,0.024,90


In [6]:
sample_data = train_ts.query('object_id == 615 & passband == 5')
date = sample_data["mjd"]
mag = sample_data["flux"]
err = sample_data["flux_err"]

In [9]:
e_features = upsilon.ExtractFeatures(date, mag, err)
e_features.run()
features = e_features.get_features()
features

  % min_n_data)


OrderedDict([('amplitude', 386.304197032748),
             ('cusum', 0.20611636592760962),
             ('eta', 1.6578265891873614),
             ('hl_amp_ratio', 1.3300739250634674),
             ('kurtosis', -1.5599369998179708),
             ('n_points', 57),
             ('period', 0.3244800372208426),
             ('period_SNR', 23.889243076602337),
             ('period_log10FAP', -6.048343754404527),
             ('period_uncertainty', 8.051614322582279e-05),
             ('phase_cusum', 0.2982932499364303),
             ('phase_eta', 0.3547877159445853),
             ('phi21', 2.1748830759803566),
             ('phi31', 3.045055087059272),
             ('quartile31', 611.040497),
             ('r21', 0.044728513618950985),
             ('r31', 0.039216077576528695),
             ('shapiro_w', 0.8778709173202515),
             ('skewness', 0.12194754284259278),
             ('slope_per10', -0.0003577323100311799),
             ('slope_per90', 0.0003078394874940904),
            

In [37]:
import warnings
warnings.filterwarnings("ignore")

result_list = []
for object_id in tqdm_notebook(train_ts.object_id.unique(), ascii=True):
    for passband in train_ts.passband.unique():
        sample_data = train_ts.query('object_id == @object_id & passband == @passband')
        e_features = upsilon.ExtractFeatures(
            sample_data["mjd"], sample_data["flux"], sample_data["flux_err"],
            n_threads=8)
        e_features.run()
        features = e_features.get_features()
        result = [object_id, passband] + list(features.values())
        result_list.append(result)

HBox(children=(IntProgress(value=0, max=7848), HTML(value='')))

TypeError: Improper input: N=7 must not exceed M=6

In [38]:
colnames = ['object_id', 'passband'] + list(features.keys())
results = pd.DataFrame(result_list, columns=colnames)
results.set_index(['object_id', 'passband']).unstack()

Unnamed: 0_level_0,amplitude,amplitude,amplitude,amplitude,amplitude,amplitude,cusum,cusum,cusum,cusum,...,weighted_mean,weighted_mean,weighted_mean,weighted_mean,weighted_std,weighted_std,weighted_std,weighted_std,weighted_std,weighted_std
passband,0,1,2,3,4,5,0,1,2,3,...,2,3,4,5,0,1,2,3,4,5
object_id,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
615,112.605911,882.445501,665.610213,484.054636,409.720682,386.304197,0.332774,0.229922,0.169395,0.176275,...,-117.466416,-111.077439,-54.247713,-54.414901,83.320558,571.205729,427.481004,326.092928,284.092763,289.603730
713,10.692238,7.539003,6.645727,8.576184,6.606117,12.126545,0.449723,0.401591,0.426368,0.420236,...,-0.910688,-1.208522,-1.153880,-1.848958,6.947711,5.794885,5.624774,6.395224,6.295784,6.956560
730,1.418107,1.320970,13.615920,20.830331,28.526381,16.923158,0.090330,0.236672,0.352261,0.308559,...,2.232651,3.129616,4.378210,4.834683,1.725323,1.412148,5.182166,7.845848,10.565612,13.047743
745,5.059816,46.389607,60.421121,64.871561,59.286871,43.880786,0.330675,0.197454,0.271623,0.309900,...,8.164125,13.527096,12.845928,9.389122,3.992333,22.162058,27.235077,33.414665,31.448462,23.081505
1124,1.657526,17.218355,53.725319,73.560465,79.426832,69.781360,0.241814,0.400268,0.366841,0.322017,...,8.946284,9.958763,9.315030,6.299269,2.153460,7.468844,19.512266,24.391502,25.165624,20.418874
1227,4.810036,1.282579,1.166443,1.787652,2.815051,5.708939,0.131503,0.176121,0.165801,0.167085,...,-0.011969,0.233091,0.641350,-0.708505,7.942973,1.560389,1.273535,1.572359,3.497728,5.207080
1598,96.120389,615.946085,542.759685,435.292557,328.485642,289.773088,0.298747,0.450448,0.412750,0.312916,...,9.293078,12.601732,14.716036,17.670026,46.393500,102.311346,98.409180,104.527606,98.954487,100.675297
1632,1.586598,1.249825,1.194724,2.105944,8.397496,23.682428,0.196807,0.246969,0.117077,0.221808,...,0.314951,1.034514,2.838713,6.136958,1.928403,1.446957,1.218639,2.500241,4.462030,9.450990
1920,6.468356,44.375708,66.032552,72.263615,64.236281,48.929428,0.276709,0.307328,0.346380,0.354525,...,20.448799,26.649844,23.661224,19.396574,6.796638,40.243473,52.659318,55.596972,48.928382,39.590224
1926,3.520881,1.436208,4.759795,1.971907,4.067816,13.413141,0.292353,0.114450,0.109741,0.152412,...,1.448782,0.310407,-0.474156,-0.699021,2.701301,1.388044,6.420838,2.495952,3.968269,9.880619
