### Can we classify each phase as relatively low or high anxiety for each subject? ###

In [8]:
# IMPORTING MODULES
import glob
import importlib
import matplotlib.pyplot as plt
import numpy as np
import os
cvx_path = os.path.abspath(os.path.join('..', '..', 'cvxEDA', 'src'))
module_path = os.path.abspath(os.path.join('..', '..', 'src'))
import pandas as pd
import random
import scipy.signal as ss
import shap
import sys
sys.path.append(module_path)

import tools.data_reader_wesad as dr
import tools.display_tools as dt
import tools.preprocessing as preprocessing
import train

from scipy.fft import fft, fftfreq, fftshift
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import train_test_split, cross_val_score, RepeatedKFold
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import normalize
from xgboost import XGBClassifier

import cvxopt.solvers
cvxopt.solvers.options['show_progress'] = False

import warnings
warnings.filterwarnings(
    "ignore", 
    category=RuntimeWarning
)


phases = dr.Phases.PHASE_ORDER


In [9]:
models = {
    "KNN": KNeighborsClassifier(n_neighbors=5),
    "DT": DecisionTreeClassifier(criterion="gini", max_features="sqrt"),
    "LogReg": LogisticRegression(max_iter=1000),
    "RF": RandomForestClassifier(n_estimators=100, max_features="sqrt"),  #  min_samples_split=3
    "XGB": XGBClassifier(use_label_encoder=False, objective="binary:logistic", eval_metric="error")
}

In [10]:
importlib.reload(train)
importlib.reload(dr)
importlib.reload(dt)
importlib.reload(preprocessing)


metrics = [
    train.Metrics.BPM, 
    train.Metrics.RMSSD, 
    train.Metrics.HF_RR, 
    train.Metrics.LF_RR, 
    # train.Metrics.IBI, 
    train.Metrics.SDNN, 
    train.Metrics.MEAN_SCL, 
    train.Metrics.SCR_RATE, 
]

model_phases = dr.Phases.PHASE_ORDER

label_type = "stai"
threshold = "fixed"

x, y = train.Train_WESAD.get_wesad_data(
    metrics, model_phases, verbose=False, label_type=label_type, threshold=threshold, 
    normalize=True, combine_phases=False
)
x = x.drop(["phaseId"], axis=1)
inds = pd.isnull(x).any(axis=1).to_numpy().nonzero()[0]
x = x.drop(labels=inds, axis=0).reset_index(drop=True)
# for i in range(y.shape[0]//10):
#     print(y.iloc[i*10:i*10+10])
y = y.drop(labels=inds, axis=0).reset_index(drop=True)
# print(y.shape)
# for i in range(y.shape[0]//10):
#     print(y.iloc[i*10:i*10+10])

acc_results = {
    "KNN": [],
    "DT": [],
    "LogReg": [],
    "RF": [],
    "XGB": []
}
reports = {
    "KNN": [],
    "DT": [],
    "LogReg": [],
    "RF": [],
    "XGB": []
}
num_iters = 1
get_importance = True
for _ in range(num_iters):
    out = train.train_predict(
        models, x, y, by_subject=True, save_metrics=True, is_resample=False, 
        get_importance=get_importance, drop_subject=True, test_size=0.2, folds=5
    )
    for model_name in acc_results:
        for i in range(len(out[model_name])):
            acc_results[model_name].append(out[model_name][i][0])
            reports[model_name].append(out[model_name][i][1])
        if get_importance:
            try:
                print("")
                # shap.plots.bar(out[model_name][0][2])
                for i in range(len(out[model_name])):
                    feature_imp = list(zip(metrics + ["lf_hf_ratio"], out[model_name][i][2]))
                    feature_imp = sorted(feature_imp, key=lambda x: x[1], reverse=True)
                    print(feature_imp)
            except Exception as e:
                print(out[model_name][0][2])
            print("")

for model_name in acc_results.keys():
    print(f"Model evaluation metrics for {model_name}:")
    for i in range(len(reports[model_name])):
        report = reports[model_name][i]
        acc = acc_results[model_name][i]
        p = report["precision"]
        r = report["recall"]
        f1 = report["f1"]
        auc = report["auc"]
        print(f"\tAccuracy: {acc}\n\tPrecision: {p}\n\tRecall: {r}\n\tF1-score: {f1}\n\tAUC score: {auc}\n" + "-"*40)
    print("")
print("\n")

y_train | y_test:
{0: 617, 1: 150} | {0: 162, 1: 30}
y_train | y_test:
{0: 617, 1: 150} | {0: 162, 1: 30}
y_train | y_test:
{0: 617, 1: 150} | {0: 162, 1: 30}
y_train | y_test:
{0: 632, 1: 135} | {0: 147, 1: 45}
y_train | y_test:
{0: 633, 1: 135} | {0: 146, 1: 45}
Fold #0
Model KNN, Predictions: [0 1], [169  23]
Feature importance not available for KNN
Fold #1
Model KNN, Predictions: [0 1], [184   8]
Feature importance not available for KNN
Fold #2
Model KNN, Predictions: [0 1], [160  32]
Feature importance not available for KNN
Fold #3
Model KNN, Predictions: [0 1], [161  31]
Feature importance not available for KNN
Fold #4
Model KNN, Predictions: [0 1], [161  30]
Feature importance not available for KNN
Fold #0
Model DT, Predictions: [0 1], [153  39]
Fold #1
Model DT, Predictions: [0 1], [172  20]
Fold #2
Model DT, Predictions: [0 1], [123  69]
Fold #3
Model DT, Predictions: [0 1], [145  47]
Fold #4
Model DT, Predictions: [0 1], [151  40]
Fold #0
Model LogReg, Predictions: [0 1], [17