### Feature Extraction with Decision Tree Classification

Labels come from 2 different PSG systems: Remlogic and Somnomedics. Sleep stage labels are as follows:

0 = Awake = SLEEP-S0 \
1 = REM = SLEEP-REM \
2 = Light = SLEEP-S1 and SLEEP-S2 \
3 = Deep = SLEEP-S3

EMFIT is the piezoelectric mat and Somnofy is the radar.

In [1]:
#! pip3 install tensorflow
#!pip3 install tsfresh

In [65]:
import os
from loader import *
from constants import *
from helpers import *
import pandas as pd
import numpy as np 
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

In [66]:
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix, classification_report, balanced_accuracy_score

from tsfresh import extract_features
from tsfresh.utilities.dataframe_functions import impute
from tsfresh import select_features

In [77]:
data = []
for subjectId in PARTICIPANT_IDS:
    sleep_stages = read_patient_data(subjectId)
    augmented = augment_data(sleep_stages)
    augmented = augmented.reset_index()
    augmented.drop('index', axis=1, inplace=True)
    augmented = augmented.reset_index()
    augmented = augmented.rename(columns={'index': 'timestamp'})
    for row in range(augmented.shape[0]):
        data.append([row, augmented["sleep_stage_num_somnofy"][row],
                    augmented["sleep_stage_num_emfit"][row],
                    augmented["sleep_stage_num_psg"][row]])

df = pd.DataFrame(data, columns=["id", 'radar', 'mat', "psg"])

### no grouping (DOESNT WORK)

In [81]:
x = df[["radar", "mat"]].reset_index()
features = x.rename(columns={'index': 'id'})
y = df["psg"]

### group by each timestamp mixing patient

In [68]:
df = df.sort_values('id')
y = []
for i in range(MEAN_SIZE):
    y.append(round(df[df["id"]==i]["psg"].mean()))
x = df[["id", "radar", "mat"]]
y = pd.Series(y)

### group by a group of timestamp 

## Feature Extraction and Decision Tree

In [80]:
features = extract_features(x, column_id="id")

Feature Extraction:   0%|                                | 0/30 [00:03<?, ?it/s]


IndexError: index -1 is out of bounds for axis 0 with size 0

In [70]:
features

Unnamed: 0,radar__variance_larger_than_standard_deviation,radar__has_duplicate_max,radar__has_duplicate_min,radar__has_duplicate,radar__sum_values,radar__abs_energy,radar__mean_abs_change,radar__mean_change,radar__mean_second_derivative_central,radar__median,...,mat__permutation_entropy__dimension_6__tau_1,mat__permutation_entropy__dimension_7__tau_1,mat__query_similarity_count__query_None__threshold_0.0,"mat__matrix_profile__feature_""min""__threshold_0.98","mat__matrix_profile__feature_""max""__threshold_0.98","mat__matrix_profile__feature_""mean""__threshold_0.98","mat__matrix_profile__feature_""median""__threshold_0.98","mat__matrix_profile__feature_""25""__threshold_0.98","mat__matrix_profile__feature_""75""__threshold_0.98",mat__mean_n_absolute_max__number_of_maxima_7
0,1.0,1.0,1.0,1.0,36.0,86.0,1.020408,-0.040816,-0.010417,0.0,...,3.308143,3.532136,,0.864899,3.133129,2.08366,2.200956,1.650986,2.461830,2.714286
1,1.0,1.0,1.0,1.0,37.0,87.0,1.040816,0.061224,0.000000,0.0,...,3.394550,3.583257,,,,,,,,2.714286
2,1.0,1.0,1.0,1.0,38.0,90.0,0.979592,0.000000,0.000000,0.0,...,3.494520,3.689670,,,,,,,,2.714286
3,1.0,1.0,1.0,1.0,34.0,82.0,0.857143,-0.040816,0.020833,0.0,...,3.284887,3.532136,,,,,,,,2.714286
4,1.0,1.0,1.0,1.0,36.0,92.0,1.204082,-0.020408,0.010417,0.0,...,3.382922,3.689670,,,,,,,,2.714286
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
852,0.0,0.0,1.0,1.0,33.0,65.0,0.897959,0.000000,-0.031250,0.0,...,3.236440,3.563643,,,,,,,,2.714286
853,0.0,0.0,1.0,1.0,30.0,58.0,0.979592,-0.040816,0.020833,0.0,...,3.413729,3.646271,,,,,,,,2.714286
854,0.0,0.0,1.0,1.0,31.0,61.0,0.918367,0.020408,0.010417,0.0,...,3.259696,3.508352,,2.076225,3.107302,2.67914,2.752883,2.511029,2.878699,2.857143
855,0.0,0.0,1.0,1.0,31.0,61.0,0.897959,0.000000,-0.010417,0.0,...,3.567761,3.752683,,,,,,,,2.714286


In [71]:
impute(features)

filtered_features = select_features(features, y)
filtered_features

 'radar__fft_coefficient__attr_"real"__coeff_27'
 'radar__fft_coefficient__attr_"real"__coeff_28'
 'radar__fft_coefficient__attr_"real"__coeff_29'
 'radar__fft_coefficient__attr_"real"__coeff_30'
 'radar__fft_coefficient__attr_"real"__coeff_31'
 'radar__fft_coefficient__attr_"real"__coeff_32'
 'radar__fft_coefficient__attr_"real"__coeff_33'
 'radar__fft_coefficient__attr_"real"__coeff_34'
 'radar__fft_coefficient__attr_"real"__coeff_35'
 'radar__fft_coefficient__attr_"real"__coeff_36'
 'radar__fft_coefficient__attr_"real"__coeff_37'
 'radar__fft_coefficient__attr_"real"__coeff_38'
 'radar__fft_coefficient__attr_"real"__coeff_39'
 'radar__fft_coefficient__attr_"real"__coeff_40'
 'radar__fft_coefficient__attr_"real"__coeff_41'
 'radar__fft_coefficient__attr_"real"__coeff_42'
 'radar__fft_coefficient__attr_"real"__coeff_43'
 'radar__fft_coefficient__attr_"real"__coeff_44'
 'radar__fft_coefficient__attr_"real"__coeff_45'
 'radar__fft_coefficient__attr_"real"__coeff_46'
 'radar__fft_coeffic

Unnamed: 0,radar__sum_of_reoccurring_data_points,radar__sum_values,"radar__fft_coefficient__attr_""abs""__coeff_0",radar__mean,"radar__fft_coefficient__attr_""real""__coeff_0",radar__median,radar__c3__lag_1,radar__c3__lag_2,radar__c3__lag_3,mat__quantile__q_0.4,...,"radar__agg_linear_trend__attr_""intercept""__chunk_len_10__f_agg_""var""","mat__fft_coefficient__attr_""abs""__coeff_7",radar__spkt_welch_density__coeff_8,"radar__cwt_coefficients__coeff_9__w_5__widths_(2, 5, 10, 20)",radar__last_location_of_maximum,"radar__fft_coefficient__attr_""real""__coeff_23",radar__quantile__q_0.8,"mat__change_quantiles__f_agg_""var""__isabs_False__qh_0.6__ql_0.0","radar__cwt_coefficients__coeff_1__w_10__widths_(2, 5, 10, 20)","mat__fft_coefficient__attr_""abs""__coeff_12"
0,35.0,36.0,36.0,0.72,36.0,0.0,0.416667,0.000000,1.295455,0.0,...,1.208,1.851508,3.535839,-0.473988,0.76,3.810992,2.0,0.000000,1.091171,3.569997
1,37.0,37.0,37.0,0.74,37.0,0.0,0.375000,0.000000,0.090909,0.0,...,1.078,5.922581,2.187060,0.415523,1.00,-5.268613,2.0,0.000000,2.125077,10.254692
2,37.0,38.0,38.0,0.76,38.0,0.0,0.666667,0.782609,1.000000,0.0,...,1.296,9.311151,4.354857,2.122977,0.74,-4.324517,2.0,0.230624,-0.829366,2.057397
3,33.0,34.0,34.0,0.68,34.0,0.0,0.250000,0.782609,0.681818,0.0,...,0.656,5.967535,4.183231,0.691868,0.86,-1.172671,2.0,0.357143,1.345568,12.202121
4,35.0,36.0,36.0,0.72,36.0,0.0,0.000000,0.782609,0.000000,0.0,...,1.374,6.644963,2.051882,-0.106822,0.96,-2.752017,2.0,0.390000,1.473370,1.205129
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
852,30.0,33.0,33.0,0.66,33.0,0.0,0.166667,0.173913,0.181818,0.0,...,0.820,7.224493,4.765229,0.073979,0.56,-7.248102,2.0,0.000000,1.665392,12.160424
853,27.0,30.0,30.0,0.60,30.0,0.0,0.000000,0.086957,0.136364,0.0,...,0.776,4.175335,3.475062,-0.035569,0.44,-4.338834,2.0,0.000000,-0.268628,9.327336
854,28.0,31.0,31.0,0.62,31.0,0.0,0.041667,0.065217,0.000000,0.0,...,0.630,4.085775,1.391392,0.586212,0.92,-8.191178,2.0,0.000000,0.250800,8.608425
855,28.0,31.0,31.0,0.62,31.0,0.0,0.166667,0.086957,0.045455,0.0,...,1.118,5.980501,0.546476,0.922813,0.08,0.800294,2.0,0.000000,0.284416,11.580026


In [92]:
# train and test split
X_feature_train, X_feature_test, y_train, y_test = train_test_split(features, y, test_size=.33)
#X_filtered_train, X_filtered_test = X_feature_train[filtered_features.columns], X_feature_test[filtered_features.columns]


In [93]:
classifier_feature = DecisionTreeClassifier()
classifier_feature.fit(X_feature_train, y_train)
preds = classifier_feature.predict(X_feature_test)

In [94]:
print(classification_report(y_test,preds))
print(balanced_accuracy_score(y_test,preds))

              precision    recall  f1-score   support

         0.0       0.88      0.88      0.88      3191
         1.0       0.94      0.94      0.94      2234
         2.0       0.90      0.90      0.90      6591
         3.0       0.89      0.90      0.89      2125

    accuracy                           0.90     14141
   macro avg       0.90      0.90      0.90     14141
weighted avg       0.90      0.90      0.90     14141

0.9047014362765377
