# Alternative 2 - On a given feature set

In [1]:
import pandas as pd
import numpy as np
import keras
import tensorflow as tf

In [2]:
df = pd.read_csv("./datasets/df_tsfresh_features.tar.gz")
df.head()

Unnamed: 0,time,act,sleep_phase,hr,pid,act__variance_larger_than_standard_deviation,act__has_duplicate_max,act__has_duplicate_min,act__has_duplicate,act__sum_values,...,hr__permutation_entropy__dimension_6__tau_1,hr__permutation_entropy__dimension_7__tau_1,hr__query_similarity_count__query_None__threshold_0.0,"hr__matrix_profile__feature_""min""__threshold_0.98","hr__matrix_profile__feature_""max""__threshold_0.98","hr__matrix_profile__feature_""mean""__threshold_0.98","hr__matrix_profile__feature_""median""__threshold_0.98","hr__matrix_profile__feature_""25""__threshold_0.98","hr__matrix_profile__feature_""75""__threshold_0.98",hr__mean_n_absolute_max__number_of_maxima_7
0,1,0.0,0.0,73.0,1,0.0,1.0,1.0,1.0,0.0,...,,,,,,,,,,
1,2,0.0,0.0,75.0,1,0.0,1.0,1.0,1.0,0.0,...,,,,,,,,,,
2,3,0.0,0.0,76.0,1,0.0,1.0,1.0,1.0,0.0,...,,,,,,,,,,
3,4,0.0,0.0,75.0,1,1.0,0.0,1.0,1.0,85.0,...,,,,,,,,,,
4,5,85.0,0.0,80.0,1,1.0,0.0,1.0,1.0,85.0,...,-0.0,,,,,,,,,


In [3]:
df["sleep_phase"].unique()
# 0 -> Wake
# 1 -> phase 1 (light sleep)
# 2 -> phase 2 (deep sleep N1)
# 3 -> phase 3 (deep sleep N2)
# 4 -> NREM    ()
# 5 -> REM

array([0., 1., 2., 5., 3., 4.])

In [4]:
df["bin_sleep_phase"] = df["sleep_phase"] > 0

In [None]:
"""
- Model input (2, S, 13): [
                          [hr_0, hr_1, hr_2   ....hr_N]
                          [act_0, act_1, act_2....act_N]
                       ]
- Model output:
    - (S, 1) (bin sleep phase)
    - (S, X) (sleep_)

"""

### Get XY from dataframe

In [85]:
def generate_XY(df, ycol="bin_sleep_phase", allign_cols=True):

    # This could be used in several different ways
    # e.g., removing nan cols, inputing averages, etc
    df = df.fillna(0.0)
    df = df.replace([np.inf, -np.inf], 0)
    
    hr_cols = [k for k in df.keys() if k.startswith("hr_")]
    act_cols = [k for k in df.keys() if k.startswith("act_")]

    hr_cols = sorted(hr_cols)
    act_cols = sorted(act_cols)

    if allign_cols:
        hr = df[hr_cols].values
        act = df[act_cols].values
        X = np.stack((act,hr))
        X = X.transpose(1,0,2)
    else:
        X = df[hr_cols + act_cols].values

    Y  = df[ycol].values.reshape(-1, 1)
        
    return X, Y 


In [86]:
# generate_XY(df, allign_cols=True)   #  X.shape is (205161, 2, 778)
# generate_XY(df, allign_cols=False)  #  X.shape is (205161, 1556)

In [98]:
allign_cols = True
df_XY = df.groupby("pid").apply(lambda x: generate_XY(x, allign_cols=allign_cols))
df_XY.head()

pid
1     ([[[ 0.   0.   0.   0.   0.   0.   0.   0.   0...
16    ([[[ 0.   0.   0.   0.   0.   0.   0.   0.   0...
21    ([[[ 8.50000000e+02  2.70000000e+01  1.6000000...
28    ([[[ 0.   0.   0.   0.   0.   0.   0.   0.   0...
33    ([[[ 0.   0.   0.   0.   0.   0.   0.   0.   0...
dtype: object

In [99]:
idx = 3
df_XY.iloc[idx][0].shape, df_XY.iloc[idx][1].shape

((625, 2, 778), (625, 1))

In [100]:
xs, ys = [], []
for row_id, (x, y) in df_XY.items():
    xs.append(x)
    ys.append(y)
    
xs = np.array(xs, dtype=object)
ys = np.array(ys, dtype=object)


In [101]:
# Now we can create a simple trainset from the dataset making sure that
# data from one subject is NOT at the same time in the training and in the test sets
subjects_train_idx = [1, 2, 3, 4]    
np.vstack(xs[subjects_train_idx]).shape, np.vstack(ys[subjects_train_idx]).shape

((3500, 2, 778), (3500, 1))

In [185]:
subjects_train_idx = range(100)
X_train = np.vstack(xs[subjects_train_idx])
Y_train = np.vstack(ys[subjects_train_idx])

subjects_test_idx = range(100, 150)
X_val = np.vstack(xs[subjects_test_idx])
Y_val = np.vstack(ys[subjects_test_idx])

subjects_test_idx = range(150, 200)
X_test = np.vstack(xs[subjects_test_idx])
Y_test = np.vstack(ys[subjects_test_idx])


### Evaluate a few models

In [103]:
def simple_dense_model(allign_cols=True):
    
    model = tf.keras.models.Sequential()
    if allign_cols:
        model.add(tf.keras.layers.Dense(12, input_shape=(2, 778), activation='relu'))
    else:
        model.add(tf.keras.layers.Dense(12, input_shape=(1556,), activation='relu'))
    
    model.add(tf.keras.layers.Dense(8, activation='relu'))
    model.add(tf.keras.layers.Dense(1, activation='sigmoid'))

    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

simple_model = simple_dense_model()
  

In [93]:
# Allign_cols = False
with tf.device('/cpu:0'):
    early_stop_callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)
    history = simple_model.fit(X_train, Y_train,
                               validation_data=(X_val, Y_val), 
                               epochs=50, 
                               batch_size=8,
                               callbacks=[early_stop_callback])


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50


In [95]:
with tf.device('/cpu:0'):
    simple_model.evaluate(x=X_test, y=Y_test)



In [104]:
# Allign_cols = True
with tf.device('/cpu:0'):
    early_stop_callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)
    history = simple_model.fit(X_train, Y_train,
                               validation_data=(X_val, Y_val), 
                               epochs=50, 
                               batch_size=8,
                               callbacks=[early_stop_callback])


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50


In [105]:
with tf.device('/cpu:0'):
    simple_model.evaluate(x=X_test, y=Y_test)



In [116]:
def cnn_lstm_model(cnn_d = 10, lstm_d = 4):
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.Conv1D(cnn_d, kernel_size=(3,), padding='same'))
    
    # Batch Normalization was resulting into NAN due to vanishing coefficients
    # model.add(tf.keras.layers.BatchNormalization(epsilon=1e-04, axis=-1, momentum=0.9))
    model.add(tf.keras.layers.Activation(tf.nn.relu))
    
    model.add(tf.keras.layers.Dropout(0.1))
    model.add(tf.keras.layers.LSTM(lstm_d, return_sequences=False))
    model.add(tf.keras.layers.Dense(1, activation="sigmoid", name='output'))
    
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

cnnlstm_model = cnn_lstm_model()

In [117]:
with tf.device('/cpu:0'):
    early_stop_callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)
    history = cnnlstm_model.fit(X_train, Y_train, 
                               validation_data=(X_val, Y_val), 
                               epochs=50, 
                               batch_size=8,
                               callbacks=[early_stop_callback])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50


In [118]:
with tf.device('/cpu:0'):
    cnnlstm_model.evaluate(x=X_test, y=Y_test)



# What if we perform some feature normalization

In [186]:
from sklearn.preprocessing import MinMaxScaler

print(X_train.shape)
print(X_train.min().round(5), X_train.max().round(5)) # -20, 100

scaler = MinMaxScaler(feature_range=(-1,1))
X_train_norm = scaler.fit_transform(X_train.reshape(X_train.shape[0], -1)).reshape(X_train.shape)
X_val_norm = scaler.transform(X_val.reshape(X_val.shape[0], -1)).reshape(X_val.shape)
X_test_norm = scaler.transform(X_test.reshape(X_test.shape[0], -1)).reshape(X_test.shape)

print(X_train.shape)
print(X_train_norm.min().round(5), X_train_norm.max().round(5)) # -1, 1
print(X_val_norm.min().round(5), X_val_norm.max().round(5)) # -1, 1
print(X_test_norm.min().round(5), X_test_norm.max().round(5)) # -1, 1c


(102759, 2, 778)
-5.388638848904246e+20 303367410523114.6
(102759, 2, 778)
-1.0 1.0
-26.98382 25.24706
-1.97458 5.48472


In [187]:
# Allign_cols = True
with tf.device('/cpu:0'):
    early_stop_callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)
    history = simple_model.fit(X_train_norm, Y_train,
                               validation_data=(X_val_norm, Y_val), 
                               epochs=50, 
                               batch_size=8,
                               callbacks=[early_stop_callback])


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50


In [189]:
with tf.device('/cpu:0'):
    simple_model.evaluate(x=X_test_norm, y=Y_test)



In [192]:
with tf.device('/cpu:0'):
    p = simple_model.predict(X_test_norm)
    
print(p.min(), p.max())

0.0024554483 0.93269855


In [193]:
# Allign_cols = True
with tf.device('/cpu:0'):
    early_stop_callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)
    history = cnnlstm_model.fit(X_train_norm, Y_train,
                               validation_data=(X_val_norm, Y_val), 
                               epochs=50, 
                               batch_size=8,
                               callbacks=[early_stop_callback])


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50


In [194]:
with tf.device('/cpu:0'):
    cnnlstm_model.evaluate(x=X_test_norm, y=Y_test)



In [None]:
with tf.device('/cpu:0'):
    p = cnnlstm_model.predict(X_test_norm)
    
print(p.min(), p.max())