### Can we classify each phase as relatively low or high anxiety for each subject? ###

In [2]:
# IMPORTING MODULES
import glob
import importlib
import matplotlib.pyplot as plt
import numpy as np
import os
cvx_path = os.path.abspath(os.path.join('..', '..', 'cvxEDA', 'src'))
module_path = os.path.abspath(os.path.join('..', '..', 'src'))
sys.path.append(module_path)
import pandas as pd
import random
import scipy.signal as ss
import sys

import tools.data_reader_wesad as dr
import tools.display_tools as dt
import tools.preprocessing as preprocessing
import train

from scipy.fft import fft, fftfreq, fftshift
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import train_test_split, cross_val_score, RepeatedKFold
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import normalize
from xgboost import XGBClassifier

import cvxopt.solvers
cvxopt.solvers.options['show_progress'] = False

import warnings
warnings.filterwarnings(
    "ignore", 
    category=RuntimeWarning
)


phases = dr.Phases.PHASE_ORDER


In [9]:
importlib.reload(train)
importlib.reload(dr)
importlib.reload(dt)
importlib.reload(preprocessing)


metrics = [
    train.Metrics.BPM, 
    train.Metrics.RMSSD, 
    train.Metrics.HF_RR, 
    train.Metrics.LF_RR, 
    train.Metrics.IBI, 
    train.Metrics.SDNN, 
    train.Metrics.MEAN_SCL, 
    train.Metrics.SCR_RATE, 
    train.Metrics.RESP,
    train.Metrics.MEAN_WRIST_ACT, 
    train.Metrics.PEAK_WRIST_ACC 
]

model_phases = [
    [
        dr.Phases.BASE,
        dr.Phases.FUN,
        dr.Phases.TSST,
        dr.Phases.MEDI_1,
        dr.Phases.MEDI_2
    ],
    [
        dr.Phases.BASE,
        dr.Phases.TSST,
        dr.Phases.MEDI_1,
        dr.Phases.MEDI_2
    ],
    [
        dr.Phases.BASE,
        dr.Phases.MEDI_1,
        dr.Phases.MEDI_2
    ],
    [
        dr.Phases.BASE,
    ]
]

models = {
    "SVM": SVC(C=10, gamma=1),  # C=10, gamma=1
    "KNN": KNeighborsClassifier(n_neighbors=7),
    "DT": DecisionTreeClassifier(),
    "LogReg": LogisticRegression(max_iter=1000),
    "Bayes": GaussianNB(),
    "XGB": XGBClassifier()
}

label_type = "stai"

for phases in model_phases:
    print(f"PHASES: {phases} " + "-"*30)
    x, y = train.Train_WESAD.get_wesad_data(metrics, phases, verbose=False, label_type=label_type)
    # 0-1 scaling
    for i in range(2, len(x.columns)):
        if x.columns[i] in metrics:
            data_col = x[x.columns[i]]
            data_col = (data_col - data_col.min())/(data_col.max() - data_col.min())
            x[x.columns[i]] = data_col

    acc_results = {
        "SVM": [], "KNN": [],
        "DT": [],
        "LogReg": [],
        "Bayes": [],
        "XGB": []
    }
    reports = {
        "SVM": [], "KNN": [],
        "DT": [],
        "LogReg": [],
        "Bayes": [],
        "XGB": [],
    }
    num_iters = 10
    for _ in range(num_iters):
        out = train.train_predict(models, x, y, by_subject=False, save_metrics=True)
        for model_name in acc_results:
            acc_results[model_name].append(out[model_name][0])
            reports[model_name].append(out[model_name][1])

    for model_name in acc_results.keys():
        acc = np.mean(acc_results[model_name])
        print(f"{model_name} accuracy over {num_iters} rounds: {acc}")
        if acc > 0.65:
            print(f"Model evaluation metrics for {model_name}:")
            p = np.mean([report["precision"] for report in reports[model_name]])
            r = np.mean([report["recall"] for report in reports[model_name]])
            f1 = np.mean([report["f1"] for report in reports[model_name]])
            auc = np.mean([report["auc"] for report in reports[model_name]])
            report = reports[model_name]
            print(f"Precision: {p}\nRecall: {r}\nF1-score: {f1}\nAUC score: {auc}")
    print("\n")

PHASES: ['Base', 'Fun', 'TSST', 'Medi_1', 'Medi_2'] ------------------------------
SVM accuracy over 10 rounds: 0.4699999999999999
KNN accuracy over 10 rounds: 0.45
DT accuracy over 10 rounds: 0.5800000000000001
LogReg accuracy over 10 rounds: 0.4999999999999999
Bayes accuracy over 10 rounds: 0.56
XGB accuracy over 10 rounds: 0.65


PHASES: ['Base', 'TSST', 'Medi_1', 'Medi_2'] ------------------------------
SVM accuracy over 10 rounds: 0.55
KNN accuracy over 10 rounds: 0.4375
DT accuracy over 10 rounds: 0.6625
Model evaluation metrics for DT:
Precision: 0.755
Recall: 0.5328571428571428
F1-score: 0.6053463203463203
AUC score: 0.6664285714285715
LogReg accuracy over 10 rounds: 0.6
Bayes accuracy over 10 rounds: 0.6375
XGB accuracy over 10 rounds: 0.7
Model evaluation metrics for XGB:
Precision: 0.7683333333333333
Recall: 0.5945238095238095
F1-score: 0.6433333333333333
AUC score: 0.6889285714285714


PHASES: ['Base', 'Medi_1', 'Medi_2'] ------------------------------
Only one label in tes