In [1]:
# Load necessary libraries 

In [1]:
import numpy as np
import os
import keras as ks
from keras.callbacks import Callback
from keras.optimizers import Adam
from keras.layers import Input, Dense, Lambda
from keras.models import Model
from keras.models import Sequential
from keras.models import load_model
from keras import backend as K
from keras import objectives
import scipy.io as scio
import gzip
from six.moves import cPickle
import sys, random
from sklearn.model_selection import train_test_split

import math
from sklearn import mixture
from sklearn.cluster import KMeans
from keras.models import model_from_json
import json
import glob
import pandas as pd
from tqdm import tqdm
from sklearn.preprocessing import StandardScaler, MaxAbsScaler
from scipy.ndimage import gaussian_filter
from collections import defaultdict
from scipy.ndimage import label
import warnings
from sklearn.preprocessing import MaxAbsScaler
import itertools
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.metrics import accuracy_score, auc, f1_score
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
%matplotlib inline
import pickle
warnings.filterwarnings("ignore")


font = {'weight' : 'bold',
        'size'   : 18}
import matplotlib
matplotlib.rc('font', **font)

from numpy import array
import keras 
from tensorflow.python.keras.utils.data_utils import Sequence
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import ConvLSTM2D
from keras.layers import Embedding
from keras.layers import Dropout
from keras.callbacks import EarlyStopping, ModelCheckpoint

Using TensorFlow backend.


In [2]:
from keras import backend as K
K.tensorflow_backend._get_available_gpus()

['/job:localhost/replica:0/task:0/device:GPU:0',
 '/job:localhost/replica:0/task:0/device:GPU:1',
 '/job:localhost/replica:0/task:0/device:GPU:2']

Select the GPU for running the rest of the model

In [3]:
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" 
os.environ["CUDA_VISIBLE_DEVICES"] = "GPU:1"

Creating the data generator class

In [None]:
class data_generator:
''' This class has essential functions supporting
fast and effective computation for reading the raw data, droping the irrelevant lables, 
preparing the input data and generating the batch to be used in LSTM model.'''
    
    def __init__(self, path_to_data = 'None'):
        super().__init__()
        self.path = path_to_data
        self.fileDic = self.readFiles()
        self.users = list(self.fileDic.keys())
        self.train = [key for key in self.users if key not in ['S11', 'S10', 'S4']]
        self.test = { your_key: self.fileDic[your_key] for your_key in ['S11', 'S10', 'S4']}
        self.fileDic = { your_key: self.fileDic[your_key] for your_key in self.train }
        self.users = list(self.fileDic.keys())
        self.keys = ['Resp', 'ECG', 'ACC', 'Temp', 'EDA', 'EMG']
        self.mask_data()
        self.mask_test_data()
        
    def readFiles(self):
        fileDic = {}
        files = [file for file in glob.glob('Data/*.pkl')]
        for name in tqdm(files):
            file = pickle.load(open(name, 'rb'), encoding = 'latin1')
            fileDic[name.split('.')[0].split('/')[-1]] = file
        return fileDic
    
    def mask_data(self):
        ''' This function drops the irrelevant lables of the train set for this project.
        USAGE: Mask the irrelevant features associated with irrelevant labels that are not used in this study 
        OUTPUT: New dictionary without the irrelevant features and labels for the train set'''
        for user in self.users:
            mask = np.isin(self.fileDic[user]['label'], [0,4,5,6,7], invert=True)
            self.fileDic[user]['label'] = self.fileDic[user]['label'][mask]
            self.fileDic[user]['label'] = self.fileDic[user]['label'] - 1
            self.fileDic[user]['label'] = keras.utils.to_categorical(self.fileDic[user]['label'],
                                                                                      num_classes=3, dtype='float32')
            for key in self.keys:
                self.fileDic[user]['signal']['chest'][key] = self.fileDic[user]['signal']['chest'][key][mask]
    
    def mask_test_data(self):
        ''' This function drops the irrelevant lables of the test set for this project.
        USAGE: Mask the irrelevant features associated with irrelevant labels that are not used in this study 
        OUTPUT: New dictionary without the irrelevant features and labels for the test set'''
        for user in self.test:
            mask = np.isin(self.test[user]['label'], [0,4,5,6,7], invert=True)
            self.test[user]['label'] = self.test[user]['label'][mask]
            self.test[user]['label'] = self.test[user]['label'] - 1
            self.test[user]['label'] = keras.utils.to_categorical(self.test[user]['label'], num_classes=3, dtype='float32')
            for key in self.keys:
                self.test[user]['signal']['chest'][key] = self.test[user]['signal']['chest'][key][mask]   
    
    def create_data(self, user, N_samples):
        ''' This function creates and stacks the time series data for the train set.
        USAGE: Create the train dataset
        ARGS: user = string (patient ID), N_samples = Numeric represnting the number of samples for picking the records
        OUTPUT: x and y as features and labels'''
        length = self.fileDic[user]['signal']['chest']['ECG'].shape[0]
        max_interval = length//N_samples
        i = np.random.choice(max_interval - 1, 1, replace=True)[0]
        
        x = [np.hstack(self.fileDic[user]['signal']['chest']['ECG'][i*N_samples:(i+1)*N_samples]), 
            np.hstack(self.fileDic[user]['signal']['chest']['EMG'][i*N_samples:(i+1)*N_samples]),
            np.hstack(self.fileDic[user]['signal']['chest']['Temp'][i*N_samples:(i+1)*N_samples]),
            np.hstack(self.fileDic[user]['signal']['chest']['Resp'][i*N_samples:(i+1)*N_samples]),
            np.hstack(self.fileDic[user]['signal']['chest']['EDA'][i*N_samples:(i+1)*N_samples]),
            np.hstack(self.fileDic[user]['signal']['chest']['ACC'][i*N_samples:(i+1)*N_samples, 0]),
            np.hstack(self.fileDic[user]['signal']['chest']['ACC'][i*N_samples:(i+1)*N_samples, 1]),
            np.hstack(self.fileDic[user]['signal']['chest']['ACC'][i*N_samples:(i+1)*N_samples, 2])]

        y = self.fileDic[user]['label'][i*N_samples:(i+1)*N_samples]
        return x, y[int(N_samples/2) + 2]   
        
    
    def create_test_data(self, user, N_samples = 256):
        ''' This function creates and stacks the time series data for the test set.
        USAGE: Create the test dataset
        ARGS: user = string (patient ID), N_samples = Numeric represnting the number of samples for picking the records
        OUTPUT: X and Y as features and labels'''
        length = self.test[user]['signal']['chest']['ECG'].shape[0]
        max_interval = length//N_samples
        i = np.random.choice(max_interval - 1, 1, replace=True)[0]
        X = []
        Y = []
        for i in range(max_interval):
            X.append([np.hstack(self.test[user]['signal']['chest']['ECG'][i*N_samples:(i+1)*N_samples]), 
                np.hstack(self.test[user]['signal']['chest']['EMG'][i*N_samples:(i+1)*N_samples]),
                np.hstack(self.test[user]['signal']['chest']['Temp'][i*N_samples:(i+1)*N_samples]),
                np.hstack(self.test[user]['signal']['chest']['Resp'][i*N_samples:(i+1)*N_samples]),
                np.hstack(self.test[user]['signal']['chest']['EDA'][i*N_samples:(i+1)*N_samples]),
                np.hstack(self.test[user]['signal']['chest']['ACC'][i*N_samples:(i+1)*N_samples, 0]),
                np.hstack(self.test[user]['signal']['chest']['ACC'][i*N_samples:(i+1)*N_samples, 1]),
                np.hstack(self.test[user]['signal']['chest']['ACC'][i*N_samples:(i+1)*N_samples, 2])])

            Y.append(self.test[user]['label'][i*N_samples:(i+1)*N_samples][int(N_samples/2) + 2])
            
        return X, Y


    def batch_generator_train(self, batch_size = 8, N_samples=256):
        ''' This function generates the batch for the train set.
        USAGE: Generate the train batch 
        ARGS: batch_size = Numeric representing the batch size (number of patients), N_samples = Numeric represnting the number of samples for picking the records
        OUTPUT: Train batch for X and Y'''
        while True:
            # create the indicies
            self.batch_indices_tr = np.random.choice(len(self.users), batch_size, replace=True)  
            users_to_pick = [self.users[i] for i in self.batch_indices_tr]
            
            batch_tr = np.array([self.create_data(user, N_samples) for user in users_to_pick])
            
            batch_tr_X = np.array([batch_tr[i][0] for i in range(len(batch_tr))])
            batch_tr_Y = np.vstack([batch_tr[i][1] for i in range(len(batch_tr))])
            # yield the data
            
            yield (batch_tr_X, batch_tr_Y)
                        
    def batch_generator_validation(self, batch_size = 4, N_samples=256):
        ''' This function generates the batch for the validation set.
        USAGE: Generate the validation batch 
        ARGS: batch_size = Numeric representing the batch size (number of patients), N_samples = Numeric represnting the number of samples for picking the records
        OUTPUT: Validation batch for X and Y'''
        while True:
            # create the indicies
            self.batch_indices_tr = np.random.choice(len(self.users), batch_size, replace=True)  
            users_to_pick = [self.users[i] for i in self.batch_indices_tr]
            
            batch_tr = np.array([self.create_data(user, N_samples) for user in users_to_pick])
            
            batch_tr_X = np.array([batch_tr[i][0] for i in range(len(batch_tr))])
            batch_tr_Y = np.vstack([batch_tr[i][1] for i in range(len(batch_tr))])
            
            # yield the data
            yield (batch_tr_X, batch_tr_Y)
    
    
    def batch_generator_test(self, batch_size = 8, N_samples=256):
        ''' This function generates the batch for the test set.
        USAGE: Generate the test batch 
        ARGS: batch_size = Numeric representing the batch size (number of patients), N_samples = Numeric represnting the number of samples for picking the records
        OUTPUT: Test batch for X and Y'''
        while True:

            batch_test = np.array([self.create_test_data(user, N_samples) for user in data_gen.test.keys()])

#             batch_test_X = np.array([batch_test[i][0] for i in range(len(batch_test))])
#             batch_test_Y = np.vstack([batch_test[i][1] for i in range(len(batch_test))])
            # yield the data

            return batch_test
            

In [5]:
data_gen = data_generator()

100%|██████████| 15/15 [00:31<00:00,  2.00s/it]


In [7]:
# https://machinelearningmastery.com/how-to-develop-rnn-models-for-human-activity-recognition-time-series-classification/

### Find model with Keras - to run LSTM with batch generator

In [6]:
x, y = next(data_gen.batch_generator_train(batch_size=8, N_samples=256))
n_timesteps, n_features, n_outputs = x.shape[1], x.shape[2], y.shape[1]

In [7]:
n_timesteps

8

In [8]:
model = Sequential()
model.add(LSTM(256, input_shape=(n_timesteps,n_features)))
model.add(Dropout(0.2))
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.2))
#model.add(LSTM(256, activation = 'relu'))
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(3, activation='softmax'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [9]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 256)               525312    
_________________________________________________________________
dropout_1 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 256)               65792     
_________________________________________________________________
dropout_2 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 256)               65792     
_________________________________________________________________
dropout_3 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 256)               65792     
__________

Train and find the best model

In [10]:
early_stopping_callback = EarlyStopping(monitor='val_loss', patience=70, verbose = 0, mode = 'min')
checkpoint_callback = ModelCheckpoint('keras_checkpoint_1.h5', monitor='val_loss', verbose=1, save_best_only=True, mode='min')


In [12]:
model.fit_generator(data_gen.batch_generator_train(batch_size=8, N_samples=256), steps_per_epoch = 32, 
                    verbose=1, validation_data=data_gen.batch_generator_validation(batch_size=4, N_samples=256), validation_steps = 64,
                   epochs=200, callbacks=[early_stopping_callback, checkpoint_callback])

Epoch 1/200

Epoch 00001: val_loss improved from inf to 0.53390, saving model to keras_checkpoint_1.h5
Epoch 2/200

Epoch 00002: val_loss did not improve from 0.53390
Epoch 3/200

Epoch 00003: val_loss did not improve from 0.53390
Epoch 4/200

Epoch 00004: val_loss did not improve from 0.53390
Epoch 5/200

Epoch 00005: val_loss did not improve from 0.53390
Epoch 6/200

Epoch 00006: val_loss did not improve from 0.53390
Epoch 7/200

Epoch 00007: val_loss improved from 0.53390 to 0.51348, saving model to keras_checkpoint_1.h5
Epoch 8/200

Epoch 00008: val_loss did not improve from 0.51348
Epoch 9/200

Epoch 00009: val_loss did not improve from 0.51348
Epoch 10/200

Epoch 00010: val_loss improved from 0.51348 to 0.49407, saving model to keras_checkpoint_1.h5
Epoch 11/200

Epoch 00011: val_loss did not improve from 0.49407
Epoch 12/200

Epoch 00012: val_loss did not improve from 0.49407
Epoch 13/200

Epoch 00013: val_loss did not improve from 0.49407
Epoch 14/200

Epoch 00014: val_loss did

<keras.callbacks.History at 0x7f1654225f28>

In [11]:
model.load_weights('keras_checkpoint.h5')

In [12]:
data_gen.test.keys()

dict_keys(['S11', 'S10', 'S4'])

In [13]:
x_test = data_gen.batch_generator_test(batch_size=8, N_samples=256)

In [14]:
test_X = [x[0] for x in x_test]
test_Y = [x[1] for x in x_test]
test_Y = np.vstack(test_Y)
test_X = np.vstack(test_X)

In [15]:
prediction = model.predict(test_X)
errors = abs(prediction - test_Y)
mape = 100 * np.mean(errors / test_Y)
accuracy = 100 - mape

In [21]:
errors

array([[0.07798702, 0.07036364, 0.0076233 ],
       [0.04094309, 0.03955093, 0.00139223],
       [0.11538738, 0.09370087, 0.02168656],
       ...,
       [0.09596918, 0.28173316, 0.18576396],
       [0.18640466, 0.28467286, 0.09826812],
       [0.08424709, 0.16128683, 0.07703976]], dtype=float32)

In [16]:
print('LSTM Model Performance: ')
print('Accuracy = {:0.2f}.'.format(accuracy_score(test_Y.argmax(axis=1), prediction.argmax(axis=1))))
print(classification_report(test_Y.argmax(axis=1), prediction.argmax(axis=1), target_names = ['Neutral', 'Amused', 'Stress']))

LSTM Model Performance: 
Accuracy = 0.80.
              precision    recall  f1-score   support

     Neutral       0.76      0.97      0.85      9620
      Amused       0.93      0.63      0.75      5576
      Stress       0.80      0.59      0.68      3041

   micro avg       0.80      0.80      0.80     18237
   macro avg       0.83      0.73      0.76     18237
weighted avg       0.82      0.80      0.79     18237



In [17]:
print(confusion_matrix(test_Y.argmax(axis=1), prediction.argmax(axis=1)))

[[9313  197  110]
 [1726 3511  339]
 [1186   48 1807]]
