In [1]:
import pickle
import pandas as pd
import numpy
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from datetime import datetime

import glob
from tqdm import tqdm

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from sklearn.metrics import confusion_matrix, classification_report

In [3]:
def readFiles():
    fileDic = {}
    files = [file for file in glob.glob('transformed_data_updated/*.pickle')]
    users = set([file.split('/')[-1].split('_')[0] for file in files])
    for user in tqdm(users):
        fileDic[user] = {}
        user_files = [file for file in files if user in file]
        X = []
        for file in user_files:        
            file_x = pickle.load(open(file, 'rb'), encoding = 'latin1')
            fileDic[user][file.split('_')[2]] = file_x

    return fileDic   

In [4]:
def assemble_features(fileDic):
    ''' This function separates the features and the label of data.
    USAGE: separating the features and the label
    ARGS: fileDic = Dictionary represnting the data
    OUTPUT: List os features and label'''
    X = []
    y = []
    for k in fileDic.keys():
        if y == []:
            y = np.array(fileDic[k]['ECG']['y'])
        else:
            y = np.vstack((y,fileDic[k]['ECG']['y']))
        x_patient = []
        x_patient = np.array(list(zip(fileDic[k]['Temp']['x']['0'],
                                  fileDic[k]['Temp']['x1']['0'],
                                  fileDic[k]['Temp']['x2']['0'],
                                  fileDic[k]['ECG']['x']['0'],
                                  fileDic[k]['ECG']['x1']['0'],
                                  fileDic[k]['ECG']['x2']['0'],
                                  fileDic[k]['EDA']['x']['0'],
                                  fileDic[k]['EDA']['x1']['0'],
                                  fileDic[k]['EDA']['x2']['0'],
                                  fileDic[k]['EMG']['x']['0'],
                                  fileDic[k]['EMG']['x1']['0'],
                                  fileDic[k]['EMG']['x2']['0'],
                                  fileDic[k]['Resp']['x']['0'],
                                  fileDic[k]['Resp']['x1']['0'],
                                  fileDic[k]['Resp']['x2']['0'])))
        if X == []:
                X = x_patient
        else:
                X = np.vstack((X,x_patient))
'''                                  fileDic[k]['ACC']['x']['0'],
                                  fileDic[k]['ACC']['x']['1'],
                                  fileDic[k]['ACC']['x']['2'], 
                                  fileDic[k]['ACC']['x1']['0'],
                                  fileDic[k]['ACC']['x1']['1'],
                                  fileDic[k]['ACC']['x1']['2'], 
                                  fileDic[k]['ACC']['x2']['0'],
                                  fileDic[k]['ACC']['x2']['1'],
                                  fileDic[k]['ACC']['x2']['2'], 
'''

    return X, y

In [5]:
fileDic = readFiles()


100%|██████████| 14/14 [00:00<00:00, 137.52it/s]


In [64]:
X.shape

(7034, 15)

In [67]:
y.shape

(7034, 1)

In [174]:
from numpy import argmax
from keras.utils import to_categorical
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import AdaBoostClassifier
from sklearn import svm

In [108]:
y_encoded = to_categorical(y-1,num_classes=3,dtype='int32')

In [111]:
scaler = MinMaxScaler()

X_transformed = scaler.fit_transform(X)

In [112]:
X_transformed

array([[0.92875401, 0.07144002, 0.92875401, ..., 0.472567  , 0.61060596,
        0.472567  ],
       [0.91229477, 0.08556084, 0.91229477, ..., 0.4893423 , 0.59969977,
        0.4893423 ],
       [0.8799561 , 0.11455949, 0.8799561 , ..., 0.52824914, 0.57163324,
        0.52824914],
       ...,
       [0.86065107, 0.13837348, 0.86065107, ..., 0.80448834, 0.26701961,
        0.80448834],
       [0.84373979, 0.15550646, 0.84373979, ..., 0.80221515, 0.270638  ,
        0.80221515],
       [0.83936842, 0.15922627, 0.83936842, ..., 0.77939649, 0.29824498,
        0.77939649]])

Implementing the Random Forest Classifier 

In [151]:
X_train, X_test, y_train, y_test= train_test_split(X_transformed, y_encoded, test_size = 0.3, random_state = 123)
clf = RandomForestClassifier(n_estimators=45, max_depth=25, random_state=0)
clf.fit(X_train, y_train)  
RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=2, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=None,
            oob_score=False, random_state=0, verbose=0, warm_start=False)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=2, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=None,
            oob_score=False, random_state=0, verbose=0, warm_start=False)

In [133]:
prediction = clf.predict(X_test)

In [152]:
clf.score(X_test, y_test, sample_weight=None)

0.9384178114637612

In [130]:
accuracy_score(y_test,prediction)

0.935101847465656

In [138]:
f1_score(y_test,prediction,average = 'samples')

  'precision', 'predicted', average, warn_for)


0.9346281383230697

Implementing the AdaBoost Classifier

In [233]:
adaBclf = AdaBoostClassifier(clf,n_estimators=80,learning_rate=1,algorithm = 'SAMME')
X_train_ada, X_test_ada, y_train_ada, y_test_ada = train_test_split(X_transformed, y, test_size = 0.3, random_state = 123)
adaBclf.fit(X_train_ada, y_train_ada)
adaBclf.score(X_test_ada, y_test_ada, sample_weight=None)

  y = column_or_1d(y, warn=True)


0.949786830885836

In [164]:
ada_prediction = adaBclf.predict(X_test_ada)

In [172]:
adaBclf.score(X_test_ada, y_test_ada, sample_weight=None)

0.9545239223117006

Implementing the SVM Classifier

In [234]:
clfSVM = svm.SVC(C = 80, kernel = 'rbf',gamma = 10, decision_function_shape='ovo')
clfSVM.fit(X_train_ada, y_train_ada)
clfSVM.score(X_test_ada, y_test_ada, sample_weight=None)

  y = column_or_1d(y, warn=True)


0.9767882520132638