In [1]:
import sys
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import tsfresh.feature_selection
from mrmr import mrmr_classif
from sklearn.impute import SimpleImputer

sys.path.append('..')
from util import load_datasets_concat, replace_infinities, load_base_data, write_json

plt.rcParams['figure.figsize'] = [6, 5]
SAMPLING_RATE = 300

In [26]:
data = [
    "entropy/flipped",
    "global/flipped", "global/standardized",
    "interval_derivatives/flipped",
    "tsfresh/stand_flip", 
    "hrv/nn", "hrv/rr", 
    "morph_new", 'pyhrv'
]
X_train, y_train, X_test = load_datasets_concat(data, data_path="../data")

In [27]:
for column_name in X_train.columns:
    if X_train[column_name].isnull().all():
        print(column_name)

hrv/nn/tinn
hrv/rr/tinn


In [28]:
X_train = X_train.dropna(axis=1, how='all')
replace_infinities(X_train, X_test)
X_train = X_train.fillna(0)

# imputer = SimpleImputer()
# X_train = imputer.fit_transform(X_train)

In [29]:
X_train.columns

Index(['entropy/flipped/binned_entropy', 'entropy/flipped/fourier_entropy',
       'entropy/flipped/permutation_entropy', 'global/flipped/raw_global_min',
       'global/flipped/raw_global_max', 'global/flipped/raw_global_mean',
       'global/flipped/raw_global_median', 'global/flipped/raw_global_std',
       'global/flipped/raw_global_skew', 'global/flipped/raw_global_kurtosis',
       ...
       'pyhrv/sdnn', 'pyhrv/rmssd', 'pyhrv/sdsd', 'pyhrv/nn50', 'pyhrv/pnn50',
       'pyhrv/nn20', 'pyhrv/pnn20', 'pyhrv/sd1', 'pyhrv/sd2',
       'pyhrv/sd_ratio'],
      dtype='object', length=362)

In [30]:
feature_rel = tsfresh.feature_selection.relevance.calculate_relevance_table(X_train, y_train['y'], n_jobs=4)

In [7]:
feature_rel.sort_values('p_value')

Unnamed: 0_level_0,feature,type,p_value,relevant
feature,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
hrv/rr/nni_50,hrv/rr/nni_50,real,0.000000e+00,True
hrv/rr/pnni_50,hrv/rr/pnni_50,real,0.000000e+00,True
hrv/rr/nni_20,hrv/rr/nni_20,real,4.077330e-269,True
hrv/rr/pnni_20,hrv/rr/pnni_20,real,1.369784e-262,True
hrv/rr/cvsd,hrv/rr/cvsd,real,1.714915e-254,True
...,...,...,...,...
morph_new/Slope_QTO_mean,morph_new/Slope_QTO_mean,real,5.379944e-01,False
tsfresh/stand_flip/mean_second_derivative_central,tsfresh/stand_flip/mean_second_derivative_central,real,5.907617e-01,False
tsfresh/stand_flip/variance_larger_than_standard_deviation,tsfresh/stand_flip/variance_larger_than_standa...,constant,,False
hrv/nn/vlf,hrv/nn/vlf,constant,,False


In [31]:
k50 = mrmr_classif(X=X_train, y=y_train['y'], K=50)
write_json(k50, 'k50.json')

100%|██████████| 50/50 [00:04<00:00, 12.15it/s]


In [32]:
k100 = mrmr_classif(X=X_train, y=y_train['y'], K=100)
write_json(k100, 'k100.json')

100%|██████████| 100/100 [00:07<00:00, 13.82it/s]


In [33]:
k150 = mrmr_classif(X=X_train, y=y_train['y'], K=150)
write_json(k150, 'k150.json')

100%|██████████| 150/150 [00:10<00:00, 14.26it/s]


In [34]:
k200 = mrmr_classif(X=X_train, y=y_train['y'], K=200)
write_json(k200, 'k200.json')

100%|██████████| 200/200 [00:13<00:00, 15.34it/s]


In [35]:
k250 = mrmr_classif(X=X_train, y=y_train['y'], K=250)
write_json(k250, 'k250.json')

100%|██████████| 250/250 [00:15<00:00, 16.31it/s]
