### Can we classify each phase as the TSST (stress) vs non-stress phase for each subject? ###

In [4]:
# IMPORTING MODULES
import glob
import importlib
import matplotlib.pyplot as plt
import numpy as np
import os
cvx_path = os.path.abspath(os.path.join('..', '..', 'cvxEDA', 'src'))
module_path = os.path.abspath(os.path.join('..', '..', 'src'))
sys.path.append(module_path)
import pandas as pd
import random
import scipy.signal as ss
import shap
import sys

import tools.data_reader_wesad as dr
import tools.display_tools as dt
import tools.preprocessing as preprocessing
import train

from scipy.fft import fft, fftfreq, fftshift
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import train_test_split, cross_val_score, RepeatedKFold
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import normalize

import cvxopt.solvers
cvxopt.solvers.options['show_progress'] = False

import warnings
warnings.filterwarnings(
    "ignore", 
    category=RuntimeWarning
)

phases = dr.Phases.PHASE_ORDER


In [None]:
importlib.reload(train)
importlib.reload(dt)
importlib.reload(preprocessing)


metrics = [
    train.Metrics.BPM, 
    train.Metrics.RMSSD, 
    train.Metrics.HF_RR, 
    train.Metrics.LF_RR, 
    train.Metrics.IBI, 
    train.Metrics.SDNN, 
    train.Metrics.MEAN_SCL, 
    train.Metrics.SCR_RATE, 
    train.Metrics.RESP,
    train.Metrics.MEAN_WRIST_ACT, 
    train.Metrics.PEAK_WRIST_ACC 
]

model_phases = [
    [
        dr.Phases.BASE,
        dr.Phases.FUN,
        dr.Phases.TSST,
        dr.Phases.MEDI_1,
        dr.Phases.MEDI_2
    ],
    [
        dr.Phases.BASE,
        dr.Phases.TSST,
        dr.Phases.MEDI_1,
        dr.Phases.MEDI_2
    ],
    [
        dr.Phases.BASE,
        dr.Phases.MEDI_1,
        dr.Phases.MEDI_2
    ],
    # [dr.Phases.BASE,]
]

label_type = "stai"
get_shap_values = True

models = {
    "SVM": SVC(C=10, gamma=0.01), 
    "KNN": KNeighborsClassifier(n_neighbors=5),
    "LogReg": LogisticRegression(max_iter=1000),
    "Bayes": GaussianNB(),
    "DT": DecisionTreeClassifier()
}

for phases in model_phases:
    print(f"PHASES: {phases} " + "-"*30)
    normalize = True
    # normalize = False
    label_type = "all"
    x, y = train.Train_WESAD.get_wesad_data(metrics, phases, verbose=False, label_type=label_type, normalize=normalize)
    # 0-1 scaling
    for i in range(2, len(x.columns)):
        if x.columns[i] in metrics:
            data_col = x[x.columns[i]]
            data_col = (data_col - data_col.min())/(data_col.max() - data_col.min())
            x[x.columns[i]] = data_col

    acc_results = {
        "SVM": [], 
        "KNN": [],
        "LogReg": [],
        "Bayes": [],
        "DT": []
    }
    reports = {
        "SVM": [], 
        "KNN": [],
        "LogReg": [],
        "Bayes": [],
        "DT": []
    }
    shap_values = {
        "SVM": [], 
        "KNN": [],
        "LogReg": [],
        "Bayes": [],
        "DT": []
    }
    num_iters = 10
    for _ in range(num_iters):
        out = train.train_predict(models, x, y, by_subject=False, test_size=0.12, get_shap_values=get_shap_values)
        for model_name in acc_results:
                acc_results[model_name].append(out[model_name][0])
                reports[model_name].append(out[model_name][1])
                shap_values[model_name].append(out[model_name][2])

    
    for model_name in acc_results.keys():
        acc = np.mean(acc_results[model_name])
        print(f"{model_name} accuracy over {num_iters} rounds: {acc}")
        if acc > 0.78:
            p = np.mean([report["precision"] for report in reports[model_name]])
            r = np.mean([report["recall"] for report in reports[model_name]])
            f1 = np.mean([report["f1"] for report in reports[model_name]])
            auc = np.mean([report["auc"] for report in reports[model_name]])
            report = reports[model_name]
            print(f"Report:\nPrecision: {p}\nRecall: {r}\nF1-score: {f1}\nAUC score: {auc}\n")
        for i in range(len(shap_values[model_name])):
            if shap_values[model_name][i] is not None:
                shap.summary_plot(shap_values[model_name][i])