# Relax! This is only a game
A person under stress can affect their performance. Our interest lays in stress release video games that can reduce the abnormally high stress level. 

This project uses ECG signals to determine the stress level of a person before and after playing the stress release video games. To demonstrate the algorithm, different stress and non-stress ECG signals were used as input to the algorithm. After feature extraction, the features were fed into different statistical machine learning algorithms. The algorithms tested include Navie Bayes, Decision Tree and Gradient Boost. Among these algorithm Gradient Boost provided the highest accuracy. 

In [51]:
# Importing packages
import pandas as pd
import numpy as np
from numpy import set_printoptions

# Preparing dataset
from sklearn.preprocessing import MinMaxScaler
import random

# Models
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB

# Cross Validation
from sklearn.model_selection import KFold

# Performance Measure
#from sklearn.model_selection import cross_val_score
#from sklearn.metrics import confusion_matrix
#from sklearn.utils.multiclass import unique_labels
from sklearn import metrics
import statistics

# Plotting
import matplotlib.pyplot as plt

## Preparing the Dataset
Loading the feature csv file into arrays and normalized the features to range of [0,1]. There are 49 signals in total with 21 stress signals and 28 non-stress signals

In [52]:
# load the feature files into panda with X contains the features and Y contains the corresponding labels

filename = '~binhn/Downloads/stress_analysis.csv'
names = ['Mean HR','SDNN','RMSSD','NN50','PNN50','SD1','SD2','ApEn',
         'VLF(ms^2)','LF(ms^2)','HF(ms^2)','LF/HF ratio','TP',
         'VLF_lomb(ms^2)','LF_lomb(ms^2)','HF_lomb(ms^2)','LF/HF_lomb ratio','TP_lomb','Stress']
#dataframe = pd.read_csv(filename, names =names)
df = pd.read_csv(filename, names=names)
print(df.head(5))

array = df.values
X = np.array(df.iloc[:,0:18])
y = np.array(df.iloc[:,18])
y=y.reshape(-1,1)



   Mean HR      SDNN     RMSSD  NN50    PNN50       SD1       SD2     ApEn  \
0   84.760  0.053441  0.031241    21  0.20243  0.022112  0.072271  0.30596   
1   73.411  0.072690  0.083187   124  0.14045  0.058863  0.084277  0.69844   
2   76.055  0.160820  0.211560   148  0.25641  0.149790  0.171150  0.87521   
3   74.226  0.084564  0.102010    87  0.24691  0.072218  0.095325  0.75760   
4  123.880  0.088389  0.110670    60  0.16639  0.078318  0.097424  0.80389   

   VLF(ms^2)  LF(ms^2)  HF(ms^2)  LF/HF ratio       TP  VLF_lomb(ms^2)  \
0     12.406    3.9326    12.433      0.31630   30.541          400.21   
1    170.560   77.1350    48.860      1.57870  324.600         2078.80   
2    171.860  140.6900   235.750      0.59677  558.590         2899.80   
3    141.100   73.2180    66.160      1.10670  321.040         2123.20   
4    272.550   80.8040   166.750      0.48459  578.670          911.47   

   LF_lomb(ms^2)  HF_lomb(ms^2)  LF/HF_lomb ratio  TP_lomb  Stress  
0         846.71 

In [53]:
# Normalizing the features into [0,1]
scaler = MinMaxScaler(feature_range = (0,1)) # scale the values to min 0, max 1
rescaledX = np.array(scaler.fit_transform(X)) # fit the trainning feature X into scaler

set_printoptions(precision=3) # how many decimal places.
print(rescaledX[0:5,:])



[[1.196e-01 1.906e-02 8.005e-03 3.125e-02 2.706e-01 8.005e-03 2.523e-02
  2.214e-03 1.925e-05 5.443e-06 3.416e-05 6.674e-02 1.642e-05 1.961e-03
  3.140e-03 3.528e-03 3.235e-01 3.756e-03]
 [7.900e-02 3.263e-02 3.440e-02 2.788e-01 8.984e-02 3.439e-02 3.352e-02
  5.225e-01 2.691e-04 1.105e-04 1.348e-04 5.928e-01 1.771e-04 1.167e-02
  1.245e-02 2.703e-02 1.629e-01 1.730e-02]
 [8.845e-02 9.475e-02 9.962e-02 3.365e-01 4.281e-01 9.967e-02 9.346e-02
  7.569e-01 2.712e-04 2.017e-04 6.511e-04 1.836e-01 3.050e-04 1.641e-02
  1.098e-02 1.472e-02 2.929e-01 1.667e-02]
 [8.191e-02 4.100e-02 4.396e-02 1.899e-01 4.004e-01 4.398e-02 4.114e-02
  6.010e-01 2.226e-04 1.048e-04 1.826e-04 3.961e-01 1.752e-04 1.192e-02
  9.496e-03 2.698e-02 1.120e-01 1.614e-02]
 [2.594e-01 4.369e-02 4.836e-02 1.250e-01 1.655e-01 4.836e-02 4.259e-02
  6.623e-01 4.302e-04 1.157e-04 4.604e-04 1.369e-01 3.159e-04 4.917e-03
  1.119e-02 4.622e-02 5.889e-02 2.541e-02]]


## Classification 
The classifiers tested in this project include Decision Tree, Each classifier uses 5-fold for validation.

In [54]:
# Classifiers
# Decision Tree
DT = DecisionTreeClassifier(random_state=0)
# Gradient Boost
GB = GradientBoostingClassifier(n_estimators=100, 
                                learning_rate=1.0, 
                                max_depth=1, 
                                random_state=0)

# Support Vector Machine
SVM = SVC(random_state = 0, gamma='scale',probability = True)

# Linear Discrimant Analysis
LDA = LinearDiscriminantAnalysis()

# Gaussian Naive Bayes
GNB = GaussianNB()


In [70]:
# Classificaiton function with K-Fold cross-validation with user define iterations to get the standard deviation 

def classify(model, Iterations, num_folds, rescaledX, y):
    acc = []
    pre = []
    rec = []
    rocauc = []
    Skip = False

    for i in range(Iterations):

        # Shuffle the dataset for each iteration
        data = list(zip(rescaledX,y))
        random.shuffle(data)
        rescaledX, y = zip(*data)
        rescaledX = np.array(rescaledX)
        y = np.array(y)
        #y = y.reshape(-1,1)

        # Perform 5-fold validation
        kfold = KFold(n_splits = num_folds)#, shuffle=True, random_state = None)
        #results = cross_val_score(DT, rescaledX, y, cv = kfold)
        #print(results)

        for train_index, test_index in kfold.split(rescaledX):
            X_train, X_test = rescaledX[train_index], rescaledX[test_index]
            y_train, y_test = y[train_index], y[test_index]

            y_total = sum(y_test)
            if (y_total == 0) or (y_total == len(y_test)):
                Skip = True
                
            if not Skip:
                # perform training and testing
                if model == SVC:
                    model.fit(X_train,y_train)
                else:
                    model.fit(X_train,y_train.ravel())
                #dtscores = DT.score(X_test,y_test)
                yPred = model.predict(X_test)


                # record performance
                acc = np.append(acc,metrics.accuracy_score(y_test, yPred)) 
                pre = np.append(pre,metrics.precision_score(y_test,
                                                            yPred, 
                                                            pos_label=1, 
                                                            average='macro', 
                                                            labels=np.unique(yPred)))
                rec = np.append(rec,metrics.recall_score(y_test,
                                                         yPred, 
                                                         pos_label=1, 
                                                         average='macro', 
                                                         labels=np.unique(yPred)))
                rocauc = np.append(rocauc, metrics.roc_auc_score(y_test, 
                                                                 yPred,
                                                                 average='macro'))
    return acc, pre, rec, rocauc
    

    

### Parameter setting
The classification are set to 100 iteration using 5-fold cross validation.

NOTE: You might encounter single predicted labels instead of 2 class labels. This will return an error. The easiest work around is to run the cell again to shuffle tthe data. 

In [56]:
Iterations = 100
num_folds = 5


### Linear Discrimant Analysis

This section classifies the features using a Linear Discriminant Analysis (LDA) classifier.


From the warning of collinear variables, a more meaning results maybe obtained by first using features selection techniques to remove dependent variables.


In [71]:
print('Support Vector Machine')
acc, pre, rec, rocauc = classify(LDA, Iterations, num_folds,  rescaledX, y)
print('Accuracy (mean, std):', statistics.mean(acc), statistics.stdev(acc))
print('Precision (mean, std):', statistics.mean(pre), statistics.stdev(pre))
print('Recall (mean, std):', statistics.mean(rec), statistics.stdev(pre))
print('Area under the Receiver Operating Characteristic Curve', statistics.mean(rocauc), statistics.stdev(rocauc))


Support Vector Machine
Accuracy (mean, std): 0.6702020202020202 0.10104604939923673
Precision (mean, std): 0.6784361471861472 0.13562548461005922
Recall (mean, std): 0.6504599567099567 0.13562548461005922
Area under the Receiver Operating Characteristic Curve 0.6504599567099567 0.12400042133484585




### Decision Tree


In [72]:
print('Decision Tree')
acc, pre, rec, rocauc = classify(DT, Iterations, num_folds,  rescaledX, y)
print('Accuracy (mean, std):', statistics.mean(acc), statistics.stdev(acc))
print('Precision (mean, std):', statistics.mean(pre), statistics.stdev(pre))
print('Recall (mean, std):', statistics.mean(rec), statistics.stdev(pre))
print('Area under the Receiver Operating Characteristic Curve', statistics.mean(rocauc), statistics.stdev(rocauc))
            

Decision Tree
Accuracy (mean, std): 0.719164118246687 0.16612994208767567
Precision (mean, std): 0.7263761467889909 0.17796915359502816
Recall (mean, std): 0.7230176933158584 0.17796915359502816
Area under the Receiver Operating Characteristic Curve 0.7184305373525557 0.17631877446410993


### Support Vector Machine

In [73]:
print('Support Vector Machine')
acc, pre, rec, rocauc = classify(SVM, Iterations, num_folds,  rescaledX, y)
print('Accuracy (mean, std):', statistics.mean(acc), statistics.stdev(acc))
print('Precision (mean, std):', statistics.mean(pre), statistics.stdev(pre))
print('Recall (mean, std):', statistics.mean(rec), statistics.stdev(pre))
print('Area under the Receiver Operating Characteristic Curve', statistics.mean(rocauc), statistics.stdev(rocauc))

Support Vector Machine
Accuracy (mean, std): 0.7591522157996147 0.11797441377783108
Precision (mean, std): 0.8028012891090925 0.12347804484293638
Recall (mean, std): 0.744481718506285 0.12347804484293638
Area under the Receiver Operating Characteristic Curve 0.7372562849802734 0.12353236250486767


### Gradient Boosting

In [75]:
print('Gradient Boosting')
acc, pre, rec, rocauc = classify(GB, Iterations, num_folds,  rescaledX, y)
print('Accuracy (mean, std):', statistics.mean(acc), statistics.stdev(acc))
print('Precision (mean, std):', statistics.mean(pre), statistics.stdev(pre))
print('Recall (mean, std):', statistics.mean(rec), statistics.stdev(pre))
print('Area under the Receiver Operating Characteristic Curve', statistics.mean(rocauc), statistics.stdev(rocauc))

Gradient Boosting
Accuracy (mean, std): 0.8496222222222223 0.14260464582027202
Precision (mean, std): 0.8595706349206349 0.14118902637414696
Recall (mean, std): 0.8500333333333333 0.14118902637414696
Area under the Receiver Operating Characteristic Curve 0.8470333333333333 0.1442871082170833


### Gaussian Naive Bayes

In [76]:
print('Gaussian Naive Bayes')
acc, pre, rec, rocauc = classify(GNB, Iterations, num_folds,  rescaledX, y)
print('Accuracy (mean, std):', statistics.mean(acc), statistics.stdev(acc))
print('Precision (mean, std):', statistics.mean(pre), statistics.stdev(pre))
print('Recall (mean, std):', statistics.mean(rec), statistics.stdev(pre))
print('Area under the Receiver Operating Characteristic Curve', statistics.mean(rocauc), statistics.stdev(rocauc))

Gaussian Naive Bayes
Accuracy (mean, std): 0.6433555555555556 0.14638336741099803
Precision (mean, std): 0.6625242063492064 0.18695766868172312
Recall (mean, std): 0.6815710317460317 0.18695766868172312
Area under the Receiver Operating Characteristic Curve 0.5995710317460318 0.13875951327728075


### Not tested Classifiers

In [77]:
# Extra Trees
clf = ExtraTreesClassifier(n_estimators=10,
                           max_depth=None,
                           min_samples_split=2, 
                           random_state=0)
# AdaBoost
clf2 = AdaBoostClassifier(n_estimators=100)

# KNN
knn = KNeighborsClassifier()

NameError: name 'ExtraTreesClassifier' is not defined