##### Imports & setup

In [1]:
import pandas as pd
import requests
import io
import light_curve as lc
from magcvs_library.functions import tqdm2
import warnings

# Disabling FutureWarnings:
warnings.filterwarnings('ignore', category=FutureWarning)

feature_names = ['mean', 'weightedMean', 'std', 'median', 'amplitude', 'beyond1Std',
                 'cusum', 'IPR10', 'kurtosis', 'linT', 'linT_sigma', 'linT_noise',
                 'linF_slope', 'linF_slope_sigma', 'linF_chi2', 'MPR40_5', 'MPR20_10',
                 'maxSlope', 'medianAbsDev', 'medianBRP10', 'percentAmplitude',
                 'meanVariance', 'andersonDarlingNorm', 'chi2', 'skew', 'stetsonK']

## Preparation for feature extraction

In [2]:
# Initializing features with the light_curve library:
mean = lc.Mean()
weighted_mean = lc.WeightedMean()
standard_deviation = lc.StandardDeviation()
median = lc.Median()
amplitude = lc.Amplitude()
beyond_1_std = lc.BeyondNStd(nstd=1)
cusum = lc.Cusum()
inter_percentile_range_10 = lc.InterPercentileRange()
kurtosis = lc.Kurtosis()
linear_trend = lc.LinearTrend()
linear_fit_slope = lc.LinearFit()
magnitude_percentage_ratio_40_5 = lc.MagnitudePercentageRatio(quantile_numerator=.4, quantile_denominator=.05)
magnitude_percentage_ratio_20_10 = lc.MagnitudePercentageRatio(quantile_numerator=.2, quantile_denominator=.1)
maximum_slope = lc.MaximumSlope()
median_absolute_deviation = lc.MedianAbsoluteDeviation()
median_buffer_range_percentage_10 = lc.MedianBufferRangePercentage(quantile=.1)
percent_amplitude = lc.PercentAmplitude()
mean_variance = lc.MeanVariance()
anderson_darling_normal = lc.AndersonDarlingNormal()
chi2 = lc.ReducedChi2()
skew = lc.Skew()
stetson_K = lc.StetsonK()

extractor = lc.Extractor(mean, weighted_mean, standard_deviation, median, amplitude, beyond_1_std,
                         cusum, inter_percentile_range_10, kurtosis, linear_trend, linear_fit_slope,
                         magnitude_percentage_ratio_40_5, magnitude_percentage_ratio_20_10, maximum_slope,
                         median_absolute_deviation, median_buffer_range_percentage_10, percent_amplitude,
                         mean_variance, anderson_darling_normal, chi2, skew, stetson_K)

### Extracting features from positive class objects

In [3]:
# Reading IDs of selected magnetic Cataclysmic Variables:
objids_list = list(pd.read_csv('../data/magnetic_cvs_objectId.csv').values.flatten())

# Initializing the two dataframes which will contain the feature data from the positive class in g and r filter:
positive_g = pd.DataFrame(columns=['objectId', 'time_range (yr)', 'nb_of_points', *feature_names])
positive_r = pd.DataFrame(columns=['objectId', 'time_range (yr)', 'nb_of_points', *feature_names])

# Computing and appending the feature data from each object one at a time:
for object in tqdm2(objids_list):
      # Getting the data from the current object with fink api:
      pdf = pd.read_json(io.BytesIO(requests.post("https://api.fink-portal.org/api/v1/objects",
                                                  json={"objectId": object,
                                                        "columns": "i:objectId,d:anomaly_score,i:jd,i:magpsf,i:sigmapsf,i:fid",
                                                        "output-format": "json"
                                                        }
                                                  ).content
                                    )
                         ).sort_values(by='i:jd') # Sorting by ascending julian date for the extractor. (Output is descending)
      
      # g filter:
      pdf_g = pdf[pdf['i:fid'] == 1]
      t_g = pdf_g['i:jd'].values
      m_g = pdf_g['i:magpsf'].values
      m_err_g = pdf_g['i:sigmapsf'].values
      if len(t_g) >= 4: # Extractor does not accept less than 4 data points.
            result_g = extractor(t_g, m_g, m_err_g, sorted=True, check=False)
            new_row = pd.DataFrame([dict(zip(positive_g.columns, [object, round((max(t_g)-min(t_g))/365, 1), len(t_g), *result_g]))])
            positive_g = pd.concat([positive_g, new_row], ignore_index=True)

      # r filter:
      pdf_r = pdf[pdf['i:fid'] == 2]
      t_r = pdf_r['i:jd'].values
      m_r = pdf_r['i:magpsf'].values
      m_err_r = pdf_r['i:sigmapsf'].values
      if len(t_r) >= 4:
            result_r = extractor(t_r, m_r, m_err_r, sorted=True, check=False)
            new_row = pd.DataFrame([dict(zip(positive_r.columns, [object, round((max(t_r)-min(t_r))/365, 1), len(t_r), *result_r]))])
            positive_r = pd.concat([positive_r, new_row], ignore_index=True)

100%|██████████| 83/83


In [4]:
positive_g

Unnamed: 0,objectId,time_range (yr),nb_of_points,mean,weightedMean,std,median,amplitude,beyond1Std,cusum,...,MPR20_10,maxSlope,medianAbsDev,medianBRP10,percentAmplitude,meanVariance,andersonDarlingNorm,chi2,skew,stetsonK
0,ZTF17aaaadkj,5.2,99,19.351664,17.926171,0.773830,19.489305,2.169440,0.131313,0.113724,...,0.550302,11.136091,0.274799,0.424242,3.533662,0.039988,8.242957,258.384164,-2.632410,0.801240
1,ZTF17aaaehqt,5.2,454,18.985397,18.832553,0.378133,18.948875,0.988656,0.352423,0.094140,...,0.703026,2010.332470,0.265202,0.182819,1.028902,0.019917,2.215873,10.960012,0.366190,0.820527
2,ZTF17aaaikoz,5.4,467,16.244120,15.894351,0.778605,16.195093,2.373892,0.250535,0.185127,...,0.429754,2878.817933,0.281420,0.438972,2.606182,0.047931,10.835068,418.063197,0.084874,0.717832
3,ZTF17aaaitci,5.3,113,18.867676,18.270141,0.717050,18.993267,1.314201,0.380531,0.109749,...,0.726915,1.157680,0.468649,0.159292,1.428190,0.038004,3.392253,70.435940,-0.521350,0.920601
4,ZTF17aaaizfe,5.2,103,19.256734,18.915205,0.512080,19.354939,1.105764,0.368932,0.203493,...,0.678470,32.618515,0.344204,0.155340,1.332104,0.026592,0.953488,25.402100,-0.478222,0.813392
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
73,ZTF19aapzvoj,4.9,75,18.657257,18.418842,0.541252,18.529068,1.107543,0.320000,0.198619,...,0.831988,0.651039,0.327034,0.120000,1.349852,0.029010,1.867957,22.306503,0.527838,0.827795
74,ZTF19acjnxyl,2.7,4,16.806402,16.732666,0.290126,16.795084,0.292496,0.250000,0.425347,...,0.906282,0.001187,0.235491,0.000000,0.315132,0.017263,0.149260,13.795454,0.083616,0.974089
75,ZTF20aagtjlb,0.5,4,17.327740,17.271849,0.108683,17.335215,0.128086,0.500000,0.311825,...,0.781150,0.007747,0.073891,0.000000,0.143036,0.006272,0.071466,2.411835,-0.354159,0.919011
76,ZTF20abragvw,0.3,47,18.229926,17.828014,0.709194,18.186832,1.556399,0.319149,0.380666,...,0.632405,1.042656,0.505884,0.063830,1.995860,0.038903,0.513651,86.216788,0.318086,0.830296


In [5]:
positive_r

Unnamed: 0,objectId,time_range (yr),nb_of_points,mean,weightedMean,std,median,amplitude,beyond1Std,cusum,...,MPR20_10,maxSlope,medianAbsDev,medianBRP10,percentAmplitude,meanVariance,andersonDarlingNorm,chi2,skew,stetsonK
0,ZTF17aaaadkj,5.3,148,18.498460,17.911223,0.673640,18.567179,2.033953,0.168919,0.166855,...,0.633670,778.715272,0.309649,0.344595,2.969118,0.036416,7.131940,214.456210,-2.027313,0.690412
1,ZTF17aaaehqt,5.2,561,18.737848,18.609301,0.345891,18.690475,1.125522,0.326203,0.092303,...,0.686718,1526.405939,0.235197,0.256684,1.213909,0.018459,2.423991,10.407711,0.341428,0.781282
2,ZTF17aaaikoz,5.4,514,16.294597,15.948891,0.659364,16.217785,1.812482,0.311284,0.220782,...,0.489237,1696.565095,0.303348,0.344358,2.006298,0.040465,5.754310,250.015226,-0.053725,0.753322
3,ZTF17aaaitci,5.3,172,18.246411,17.824114,0.617521,18.285947,1.371412,0.383721,0.067845,...,0.711236,27.506142,0.469796,0.180233,1.582166,0.033843,0.789817,63.555108,-0.011291,0.879793
4,ZTF17aaaizfe,5.3,131,18.978965,18.805495,0.453919,18.996767,1.220155,0.320611,0.195346,...,0.672887,118.135450,0.305319,0.221374,1.524630,0.023917,0.365490,16.356019,0.421616,0.826831
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
73,ZTF19aadoqcb,5.1,81,17.810478,17.467469,0.587141,17.896920,1.287127,0.308642,0.182642,...,0.551671,53.530370,0.350085,0.259259,1.603844,0.032966,1.015659,72.008398,-0.630956,0.792517
74,ZTF19aapzvoj,5.0,97,18.457563,18.072654,0.561879,18.422853,1.079992,0.360825,0.180805,...,0.676205,15.926112,0.425125,0.082474,1.275664,0.030442,0.885812,31.932811,0.226339,0.857616
75,ZTF19acjnxyl,2.2,4,16.239316,16.132951,0.495491,16.418932,0.527661,0.250000,0.356857,...,0.790463,0.050003,0.163771,0.000000,0.886893,0.030512,0.186936,97.825885,-1.494879,0.893288
76,ZTF20abragvw,0.4,46,17.691656,17.305729,0.813959,17.444023,1.535172,0.217391,0.380809,...,0.548809,1.062906,0.467844,0.195652,2.246740,0.046008,1.662995,140.841250,1.069200,0.837161
