# Dreem Project - Deep Learning - TF & Keras - Basile NOUVELLET

## Check if GPU is used

In [1]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 8909690045639871536
, name: "/device:XLA_GPU:0"
device_type: "XLA_GPU"
memory_limit: 17179869184
locality {
}
incarnation: 9604628808204645324
physical_device_desc: "device: XLA_GPU device"
, name: "/device:XLA_CPU:0"
device_type: "XLA_CPU"
memory_limit: 17179869184
locality {
}
incarnation: 16661797590608488637
physical_device_desc: "device: XLA_CPU device"
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 11330115994
locality {
  bus_id: 1
  links {
  }
}
incarnation: 6979578593716129426
physical_device_desc: "device: 0, name: Tesla K80, pci bus id: 0000:00:1e.0, compute capability: 3.7"
]


## Data

In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

In [3]:
df_train_final = pd.read_hdf('df_train_final.h5', 'df_train_final')

var_to_pred = 'SO'
X_train, X_val, y_train, y_val = train_test_split(df_train_final.loc[:, df_train_final.columns != var_to_pred],
                                                  df_train_final[var_to_pred],
                                                  test_size=0.10,
                                                  random_state=0,
                                                  stratify=df_train_final[var_to_pred])
y_train = pd.DataFrame(y_train)
y_val = pd.DataFrame(y_val)

In [4]:
df_train_final.head()

Unnamed: 0,num_pso,mean_amp_pso,mean_dur_pso,amp_cso,dur_cso,time_since_sleep,time_in_ds,time_in_ls,time_in_rs,time_in_ws,...,wawelets_86,wawelets_87,wawelets_88,wawelets_89,wawelets_90,wawelets_91,wawelets_92,wawelets_93,wawelets_94,wawelets_95
0,237.0,152.658761,341.523207,128.017491,429.0,11379.0,2730.0,3780.0,0.0,480.0,...,457,-1.508171,-0.600457,0.605929,1.524894,0.01117,-0.000105,1.006676,1.013396,0.740114
1,176.0,146.883435,338.039773,119.130849,196.0,6721.0,2580.0,2100.0,0.0,480.0,...,453,-1.302524,-0.578372,0.553759,1.361592,-0.032801,0.001091,0.845156,0.714289,0.66125
2,456.0,152.376541,335.629386,164.29258,417.0,26832.0,3240.0,7440.0,2130.0,750.0,...,462,-1.774095,-0.736667,0.757092,1.778728,0.01614,-0.001139,1.099851,1.209672,0.872444
3,21.0,139.720772,336.285714,159.237082,407.0,1289.0,0.0,60.0,0.0,450.0,...,460,-1.514035,-0.667986,0.661924,1.535148,-0.005341,0.000916,0.961756,0.924975,0.773179
4,72.0,140.649432,349.875,130.184278,297.0,2262.0,630.0,960.0,0.0,450.0,...,452,-1.425063,-0.590922,0.636247,1.404894,0.00158,-0.002912,0.872183,0.760704,0.70121


In [5]:
features = list(df_train_final.columns)

equations = {
     'base_model': {'predictors': features[0:13]},
     'entropy_fft': {'predictors': features[0:81]},
     'dummy_variables_only': {'predictors': features[0:11] + features[81:83]},
     'dummy_variables': {'predictors': features[0:83]},
     'fft_wawelets' : {'predictors': features[0:81] + features[85:181]}
}

features

['num_pso',
 'mean_amp_pso',
 'mean_dur_pso',
 'amp_cso',
 'dur_cso',
 'time_since_sleep',
 'time_in_ds',
 'time_in_ls',
 'time_in_rs',
 'time_in_ws',
 'mean',
 'max',
 'min',
 'perm_entropy',
 'svd_entropy',
 'higuchi_fd',
 'detrended_fluctuation',
 'fft_0',
 'fft_1',
 'fft_2',
 'fft_3',
 'fft_4',
 'fft_5',
 'fft_6',
 'fft_7',
 'fft_8',
 'fft_9',
 'fft_10',
 'fft_11',
 'fft_12',
 'fft_13',
 'fft_14',
 'fft_15',
 'fft_16',
 'fft_17',
 'fft_18',
 'fft_19',
 'fft_20',
 'fft_21',
 'fft_22',
 'fft_23',
 'fft_24',
 'fft_25',
 'fft_26',
 'fft_27',
 'fft_28',
 'fft_29',
 'fft_30',
 'fft_31',
 'fft_32',
 'fft_33',
 'fft_34',
 'fft_35',
 'fft_36',
 'fft_37',
 'fft_38',
 'fft_39',
 'fft_40',
 'fft_41',
 'fft_42',
 'fft_43',
 'fft_44',
 'fft_45',
 'fft_46',
 'fft_47',
 'fft_48',
 'fft_49',
 'fft_50',
 'fft_51',
 'fft_52',
 'fft_53',
 'fft_54',
 'fft_55',
 'fft_56',
 'fft_57',
 'fft_58',
 'fft_59',
 'fft_60',
 'fft_61',
 'fft_62',
 'fft_63',
 'curr_sleep_stage_2.0',
 'curr_sleep_stage_3.0',
 'slow

## Special data treatment for Keras

In [6]:
from keras.utils import to_categorical

# Convert labels to categorical one-hot encoding
y_train = to_categorical(y_train.values, num_classes=3)
y_val = to_categorical(y_val.values, num_classes=3)

Using TensorFlow backend.


## Same architecture on all models

In [7]:
from keras.models import Sequential
from keras.layers import Dense, Dropout

In [8]:
epochs = 5
batch_size = 128

results = pd.DataFrame(columns=['model_name', 'loss', 'accuracy'])

for model_name in equations:
    predictors = equations[model_name]['predictors']

    x_train = X_train[predictors].values
    x_val = X_val[predictors].values

    model = Sequential([
        Dense(units=10, activation='relu', input_dim=x_train.shape[1]),
        Dropout(0.2),
        Dense(units=10, activation='relu'),
        Dropout(0.2),
        Dense(units=10, activation='relu'),
        Dropout(0.2),
        Dense(units=3, activation='softmax'),
    ])
    
    model.compile(loss='categorical_crossentropy',
                  optimizer='sgd',
                  metrics=['accuracy'])

    model.fit(x_train, y_train, epochs=epochs, batch_size=batch_size)

    loss_and_metrics = model.evaluate(x_val, y_val)

    print("Result for %s: %s\n\n" % (model_name, loss_and_metrics))

    results = results.append({
        'model_name': model_name,
        'loss': loss_and_metrics[0],
        'accuracy': loss_and_metrics[1],
    }, ignore_index=True)





Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Result for base_model: [11.2639201764502, 0.3009478673076907]


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Result for entropy_fft: [11.868644917599926, 0.2636447026448555]


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Result for dummy_variables_only: [1.0747486065513332, 0.4358660755145078]


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Result for dummy_variables: [1.0747563137298803, 0.4358660755145078]


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Result for fft_wawelets: [11.868644917599926, 0.2636447026448555]




In [9]:
results

Unnamed: 0,model_name,loss,accuracy
0,base_model,11.26392,0.300948
1,entropy_fft,11.868645,0.263645
2,dummy_variables_only,1.074749,0.435866
3,dummy_variables,1.074756,0.435866
4,fft_wawelets,11.868645,0.263645


In [10]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_17 (Dense)             (None, 10)                1780      
_________________________________________________________________
dropout_13 (Dropout)         (None, 10)                0         
_________________________________________________________________
dense_18 (Dense)             (None, 10)                110       
_________________________________________________________________
dropout_14 (Dropout)         (None, 10)                0         
_________________________________________________________________
dense_19 (Dense)             (None, 10)                110       
_________________________________________________________________
dropout_15 (Dropout)         (None, 10)                0         
_________________________________________________________________
dense_20 (Dense)             (None, 3)                 33        
Total para

## Separate curr_sleep 2 & 3

In [11]:
def get_train_test_split(df):
    return train_test_split(df.loc[:, df.columns != var_to_pred],
                            df[var_to_pred],
                            test_size=0.10,
                            random_state=0,
                            stratify=df[var_to_pred])

In [12]:
df_train_final_2 = df_train_final[ df_train_final["curr_sleep_stage_2.0"] == 1 ]
df_train_final_3 = df_train_final[ df_train_final["curr_sleep_stage_3.0"] == 1 ]

var_to_pred = 'SO'

# Curr sleep 2
X_train_2, X_val_2, y_train_2, y_val_2 = get_train_test_split(df_train_final_2)

y_train_2 = to_categorical(pd.DataFrame(y_train_2).values, num_classes=3)
y_val_2 = to_categorical(pd.DataFrame(y_val_2).values, num_classes=3)

# Curr sleep 3
X_train_3, X_val_3, y_train_3, y_val_3 = get_train_test_split(df_train_final_3)

y_train_3 = to_categorical(pd.DataFrame(y_train_3).values, num_classes=3)
y_val_3 = to_categorical(pd.DataFrame(y_val_3).values, num_classes=3)

In [13]:
epochs = 60
batch_size = 512

model_name = "fft_wawelets"

predictors = equations[model_name]['predictors']
print("Model is '%s' (%d predictors)\n" % (model_name, len(predictors)))

x_train_2 = X_train_2[predictors].values
x_val_2 = X_val_2[predictors].values

x_train_3 = X_train_3[predictors].values
x_val_3 = X_val_3[predictors].values

architecture = [
    Dense(units=150, activation='relu', input_dim=x_train_2.shape[1]),
    Dropout(0.2),
    Dense(units=100, activation='relu'),
    Dropout(0.3),
    Dense(units=60, activation='relu'),
    Dropout(0.4),
    Dense(units=30, activation='relu'),
    Dropout(0.3),
    Dense(units=3, activation='softmax'),
]

model_2 = Sequential(architecture)
model_3 = Sequential(architecture)

model_2.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model_3.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

model_2.fit(x_train_2, y_train_2, epochs=epochs, batch_size=batch_size)
model_3.fit(x_train_3, y_train_3, epochs=epochs, batch_size=batch_size)

loss_and_metrics_2 = model_2.evaluate(x_val_2, y_val_2)
loss_and_metrics_3 = model_3.evaluate(x_val_3, y_val_3)

print("\nResult for %s (2): %s" % (model_name, loss_and_metrics_2))
print("Result for %s (3): %s" % (model_name, loss_and_metrics_3))

Model is 'fft_wawelets' (177 predictors)

Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60
Epoch 37/60
Epoch 38/60
Epoch 39/60
Epoch 40/60
Epoch 41/60
Epoch 42/60
Epoch 43/60
Epoch 44/60
Epoch 45/60
Epoch 46/60
Epoch 47/60
Epoch 48/60
Epoch 49/60
Epoch 50/60
Epoch 51/60
Epoch 52/60
Epoch 53/60
Epoch 54/60
Epoch 55/60
Epoch 56/60
Epoch 57/60
Epoch 58/60
Epoch 59/60
Epoch 60/60
Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoc

##### Epochs=5, batch_size=128

2: 0.579

3: 0.368


##### Epochs=60, batch_size=512

2: 0.233

3: 0.368

## Original dataset (NO NEED TO RE-RUN)

In [36]:
def label_columns(df):
    l_columns = [
        'num_pso', 
        'mean_amp_pso',
        'mean_dur_pso',
        'amp_cso',
        'dur_cso',
        'curr_sleep_stage',
        'time_since_sleep',
        'time_in_ds',
        'time_in_ls',
        'time_in_rs',
        'time_in_ws',
    ]

    for i in range(12, 1261+1):
        l_columns.append('eeg_signal_%s' % (i - 12 + 1))

    df.columns = l_columns

    return df

In [47]:
import h5py

y_train = pd.read_csv("original_data/y_train.csv").values[:, 1].squeeze()
X_train = h5py.File("original_data/X_train.h5", "r")

df_train = pd.DataFrame(data=X_train["features"][:])
label_columns(df_train).head()

Unnamed: 0,num_pso,mean_amp_pso,mean_dur_pso,amp_cso,dur_cso,curr_sleep_stage,time_since_sleep,time_in_ds,time_in_ls,time_in_rs,...,eeg_signal_1241,eeg_signal_1242,eeg_signal_1243,eeg_signal_1244,eeg_signal_1245,eeg_signal_1246,eeg_signal_1247,eeg_signal_1248,eeg_signal_1249,eeg_signal_1250
0,237.0,152.658761,341.523207,128.017491,429.0,3.0,11379.0,2730.0,3780.0,0.0,...,22.707487,15.042639,9.706864,7.30548,5.344436,2.674903,-0.055816,-1.212385,-2.461937,-4.930397
1,176.0,146.883435,338.039773,119.130849,196.0,2.0,6721.0,2580.0,2100.0,0.0,...,2.839801,3.458973,4.441102,3.975107,1.00024,2.870631,7.071897,7.848365,4.033517,-2.110046
2,456.0,152.376541,335.629386,164.29258,417.0,2.0,26832.0,3240.0,7440.0,2130.0,...,27.093293,22.821611,14.196937,5.708701,-0.753271,-5.627993,-9.804085,-12.863908,-11.951175,-5.531799
3,21.0,139.720772,336.285714,159.237082,407.0,3.0,1289.0,0.0,60.0,0.0,...,28.191571,25.032295,17.296456,6.335396,-4.008689,-8.589818,-5.876062,0.166707,6.054539,12.086351
4,72.0,140.649432,349.875,130.184278,297.0,3.0,2262.0,630.0,960.0,0.0,...,-7.942308,-10.407617,-3.418883,9.222596,21.236168,28.245889,27.024864,17.794644,7.602379,-4.548318


In [48]:
df_train.to_hdf('df_train.h5', key='df_train')

## Model with original dataset

In [None]:
df_train = pd.read_hdf('df_train.h5', 'df_train')