In [8]:
import os
import time
import pickle

import numpy as np
import pandas as pd

import xgboost as xgb
from sklearn import model_selection
import sklearn.metrics as metrics

In [9]:
TOP5_FEATURES = ['net_acc_std', 'net_acc_max', 'EDA_tonic_mean', 'EDA_tonic_min',  'EDA_tonic_max', 'label']
TOP10_FEATURES = [
    "net_acc_std",
    "net_acc_max",
    "EDA_tonic_mean",
    "EDA_tonic_min",
    "EDA_tonic_max",
    "EDA_smna_mean",
    "EDA_smna_std",
    "EDA_smna_min",
    "EDA_smna_max",
    "EDA_phasic_min",
    "label"
]
TOP_EDA_FEATURES = ["EDA_smna_mean", "EDA_phasic_min", "EDA_tonic_mean", "EDA_tonic_min", "EDA_tonic_max", "label"]

STRESS = {
    0: "amusement", 1: "baseline", 2: "stress"
}
STRESS_MAP = {
    1: 0,
    2: 1
}

In [10]:
def prepare_dataset(dataset_name, features):
    data = pd.read_csv(f'../../data/03_primary/{dataset_name}/combined_subjects.csv')
    df = data.loc[:, data.columns.intersection(features)]
    df = df[df.label != 0]
    Y_ = df.label.map(STRESS_MAP)
    X_ = df.drop(columns=['label'])
    return model_selection.train_test_split(X_, Y_, test_size=0.25, random_state=42, stratify=Y_)

In [11]:
def train(m, dataset_name, features):
    X, x, Y, y = prepare_dataset(dataset_name, features)
    start_time = time.time()
    result = m.fit(X, Y).predict(x)
    print(f"Finished in: {time.time() - start_time:.2f} seconds")
    print("Accuracy: {:.2f}%".format(metrics.accuracy_score(y, result) * 100))
    print("Balanced Accuracy: {:.2f}%".format(metrics.balanced_accuracy_score(y, result) * 100))
    print("F1 Score: {0:.2f}".format(metrics.f1_score(y, result, average='macro')))
    print("Precision: {0:.2f}".format(metrics.precision_score(y, result, average='macro')))
    print("Recall: {0:.2f}".format(metrics.recall_score(y, result, average='macro')))
    print("R2 Score: {0:.2f}".format(metrics.r2_score(y, result)))

In [12]:
train(xgb.XGBClassifier(objective="binary:logistic", random_state=42), 'WESAD_preprocessed_int10_add10', TOP5_FEATURES)

Finished in: 0.14 seconds
Accuracy: 94.86%
Balanced Accuracy: 94.69%
F1 Score: 0.94
Precision: 0.94
Recall: 0.95
R2 Score: 0.78


In [13]:
train(xgb.XGBClassifier(objective="binary:logistic", random_state=42), 'WESAD_preprocessed_int10_add15', TOP5_FEATURES)

Finished in: 0.12 seconds
Accuracy: 95.60%
Balanced Accuracy: 95.24%
F1 Score: 0.95
Precision: 0.95
Recall: 0.95
R2 Score: 0.81


In [14]:
train(xgb.XGBClassifier(objective="binary:logistic", eta=0.1, random_state=42), 'WESAD_preprocessed_int15_add15', TOP5_FEATURES)

Finished in: 0.10 seconds
Accuracy: 96.82%
Balanced Accuracy: 96.52%
F1 Score: 0.97
Precision: 0.97
Recall: 0.97
R2 Score: 0.86


In [15]:
train(xgb.XGBClassifier(objective="binary:logistic", eta=0.1, random_state=42), 'WESAD_preprocessed_int15_add15', TOP10_FEATURES)

Finished in: 0.12 seconds
Accuracy: 98.41%
Balanced Accuracy: 98.33%
F1 Score: 0.98
Precision: 0.98
Recall: 0.98
R2 Score: 0.93


In [16]:
train(xgb.XGBClassifier(objective="binary:logistic", eta=0.1, random_state=42), 'WESAD_preprocessed_int15_add15', TOP_EDA_FEATURES)

Finished in: 0.10 seconds
Accuracy: 97.27%
Balanced Accuracy: 97.17%
F1 Score: 0.97
Precision: 0.97
Recall: 0.97
R2 Score: 0.88
