### Feature Extraction with Decision Tree Classification

Labels come from 2 different PSG systems: Remlogic and Somnomedics. Sleep stage labels are as follows:

0 = Awake = SLEEP-S0 \
1 = REM = SLEEP-REM \
2 = Light = SLEEP-S1 and SLEEP-S2 \
3 = Deep = SLEEP-S3

EMFIT is the piezoelectric mat and Somnofy is the radar.

In [31]:
#! pip3 install tensorflow
#!pip3 install tsfresh

In [58]:
import os
from loader import get_nn_patients
import pandas as pd
import numpy as np 
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

In [2]:
from tensorflow.keras.layers import Dense, Conv1D, BatchNormalization, MaxPooling1D, Flatten, Dropout
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.optimizers import Adam
import tensorflow as tf
from sklearn.model_selection import train_test_split

from sklearn.metrics import confusion_matrix, classification_report, accuracy_score

In [3]:
radars, mats, _, _, y = get_nn_patients()
R_train, R_test, RY_train, RY_test = train_test_split(radars, y, test_size=0.30, random_state=42)
M_train, M_test, MY_train, MY_test = train_test_split(mats, y, test_size=0.30, random_state=42)
R_train.shape, R_test.shape

((35, 857), (15, 857))

In [56]:
temp = []
labels = []
for id in range(R_train.shape[0]):
    for ind, val in enumerate(R_train[id]):
        temp.append([id, val, M_train[id][ind]])
        labels.append(RY_train[id][ind])
train = pd.DataFrame(temp, columns=['id', 'radar', 'mat'])
label = pd.Series(labels)
train.head(), label.head(), type(label)

(   id  radar  mat
 0   0    0.0  0.0
 1   0    0.0  0.0
 2   0    0.0  0.0
 3   0    0.0  0.0
 4   0    0.0  0.0,
 0    0.0
 1    0.0
 2    0.0
 3    0.0
 4    0.0
 dtype: float64,
 pandas.core.series.Series)

In [34]:
from tsfresh import extract_features
features = extract_features(train, column_id="id")

Feature Extraction: 100%|███████████████████████| 24/24 [00:13<00:00,  1.75it/s]


In [35]:
features

Unnamed: 0,mat__variance_larger_than_standard_deviation,mat__has_duplicate_max,mat__has_duplicate_min,mat__has_duplicate,mat__sum_values,mat__abs_energy,mat__mean_abs_change,mat__mean_change,mat__mean_second_derivative_central,mat__median,...,radar__permutation_entropy__dimension_6__tau_1,radar__permutation_entropy__dimension_7__tau_1,radar__query_similarity_count__query_None__threshold_0.0,"radar__matrix_profile__feature_""min""__threshold_0.98","radar__matrix_profile__feature_""max""__threshold_0.98","radar__matrix_profile__feature_""mean""__threshold_0.98","radar__matrix_profile__feature_""median""__threshold_0.98","radar__matrix_profile__feature_""25""__threshold_0.98","radar__matrix_profile__feature_""75""__threshold_0.98",radar__mean_n_absolute_max__number_of_maxima_7
0,1.0,1.0,1.0,1.0,1369.0,3069.0,0.03271,0.0,0.0,2.0,...,0.607997,0.743281,,2.36509,13.416408,6.825311,6.570318,4.893899,7.955122,3.0
1,0.0,1.0,1.0,1.0,1722.0,4020.0,0.035047,0.0,0.0,2.0,...,0.718023,0.873692,,2.004967,11.135529,6.622,6.435964,5.147708,7.235359,3.0
2,0.0,1.0,1.0,1.0,1458.0,2926.0,0.030374,0.0,0.0,2.0,...,0.789567,0.968877,,3.52099,13.416408,8.189044,8.0816,7.157145,9.383813,3.0
3,1.0,1.0,1.0,1.0,1187.0,2563.0,0.025701,0.0,0.0,2.0,...,0.577279,0.701743,,2.767194,17.204651,8.467412,9.20148,7.48182,10.222177,3.0
4,0.0,1.0,1.0,1.0,1638.0,3486.0,0.03271,0.0,0.0,2.0,...,0.843673,1.02729,,2.625752,13.416408,7.3469,7.233772,6.560281,8.175801,3.0
5,0.0,1.0,1.0,1.0,1596.0,3444.0,0.03271,0.0,0.0,2.0,...,0.533475,0.64845,,2.389912,13.114877,5.970267,5.151349,4.06067,7.593417,3.0
6,0.0,1.0,1.0,1.0,1407.0,2977.0,0.033879,0.001168,0.0,2.0,...,0.597606,0.718362,,3.162693,13.856406,7.16032,6.650289,5.948319,8.398089,3.0
7,0.0,1.0,1.0,1.0,1309.0,2779.0,0.028037,0.0,0.0,2.0,...,0.448421,0.53733,,2.610421,12.184808,5.655788,5.156136,4.225133,6.913472,3.0
8,0.0,1.0,1.0,1.0,1562.0,3192.0,0.026869,-0.001168,0.0,2.0,...,0.634444,0.756228,,2.288742,13.56466,6.433854,6.033228,5.063763,7.688837,3.0
9,0.0,1.0,1.0,1.0,1679.0,3631.0,0.025701,0.0,0.0,2.0,...,0.805729,0.968707,,3.451881,13.198633,9.85333,10.279066,8.640224,11.505369,3.0


In [57]:
from tsfresh.utilities.dataframe_functions import impute
from tsfresh import select_features

impute(features)

filtered_features = select_features(features, label)
filtered_features

AssertionError: X and y must contain the same number of samples.