# PSYC0021: Tutorial to Build Automatic Exertion Detection Algorithm using PPG Features

## Import packages

In [None]:
%matplotlib inline

import os
import csv
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
pd.set_option("display.precision", 2)

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
import seaborn as sns

import warnings
warnings.filterwarnings("ignore")

from utils.process_ppg import load_dataframe

In [None]:
basepath = 'data/'

## Load the features
Check the dataframe

In [None]:
df, ppg_df = load_dataframe(os.path.join(basepath, 'PPG_features.pkl'))
ppg_df

### Drop parameters from dataframe (e.g. sub_id)

In [None]:
ppg_df = ppg_df.drop(columns=['sub_id','s'])
ppg_df.head()

## Inspect features

### Boxplot using all features

In [None]:
sns.set(font_scale=2) 
params = ['bpm', 'ibi', 'sdnn', 'sdsd', 'rmssd','pnn20', 'pnn50', \
    'hr_mad', 'sd1', 'sd2', 'sd1/sd2', 'breathingrate']
fig, ax = plt.subplots(3, 4, figsize=(20, 15), sharex=True, sharey=False)
plt.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=0.5, hspace=0.5)
for idx in range(len(params)):
    plt.subplot(3,4,idx+1)
    sns.boxplot(x="exertion_level", y=params[idx], data=ppg_df)
plt.show()

## Preparing the data, transforming the labels into number, so they are readable by ML algorithm

In [None]:
targets, targets_index = pd.factorize(ppg_df['exertion_level'])
print(targets)
print(targets_index)

## Split data into training and testing sets (65% v/s 35%)

In [None]:
features = ppg_df.columns[1:]
ppg_df['is_train'] = np.random.uniform(0, 1, len(ppg_df)) <= .65

# Create two new dataframes, one with the training rows, one with the test rows
train, test = ppg_df[ppg_df['is_train']==True], ppg_df[ppg_df['is_train']==False]
train_target = targets[ppg_df['is_train']==True]
test_target = targets[ppg_df['is_train']==False]

## Train Random Forest Classifier

In [None]:
# Create a random forest Classifier
clf = RandomForestClassifier(n_jobs=2, random_state=0)

# Train the Classifier
clf.fit(train[features], train_target)

## Evaluate the trained classifier on test data

In [None]:
test_pred = clf.predict(test[features])
test_pred_prob = clf.predict_proba(test[features])

### Confusion Matrix

In [None]:
# Generate Confusion Matrix
cm = confusion_matrix(test_target, test_pred, normalize=None)
ax= plt.subplot()
print(classification_report(test_target, test_pred))

sns.heatmap(cm, annot=True, ax = ax, fmt='g') #annot=True to annotate cells
ax.set_xlabel('Predicted labels')
ax.set_ylabel('True labels')
ax.set_title('Confusion Matrix')
plt.show()