### Can we classify each phase as relatively low or high anxiety for each subject? ###

In [1]:
# IMPORTING MODULES
import glob
import importlib
import matplotlib.pyplot as plt
import numpy as np
import os
cvx_path = os.path.abspath(os.path.join('..', '..', 'cvxEDA', 'src'))
module_path = os.path.abspath(os.path.join('..', '..', 'src'))
import pandas as pd
import random
import scipy.signal as ss
import sys
sys.path.append(module_path)

import tools.data_reader_wesad as dr
import tools.display_tools as dt
import tools.preprocessing as preprocessing
import train

from scipy.fft import fft, fftfreq, fftshift
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import train_test_split, cross_val_score, RepeatedKFold
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import normalize
from xgboost import XGBClassifier

import cvxopt.solvers
cvxopt.solvers.options['show_progress'] = False

import warnings
warnings.filterwarnings(
    "ignore", 
    category=RuntimeWarning
)


phases = dr.Phases.PHASE_ORDER


  from .autonotebook import tqdm as notebook_tqdm


In [7]:
importlib.reload(train)
importlib.reload(dr)
importlib.reload(dt)
importlib.reload(preprocessing)


metrics = [
    train.Metrics.BPM, 
    train.Metrics.RMSSD, 
    train.Metrics.HF_RR, 
    train.Metrics.LF_RR, 
    train.Metrics.IBI, 
    train.Metrics.SDNN, 
    train.Metrics.MEAN_SCL, 
    train.Metrics.SCR_RATE, 
]

# model_phases = [
#     [
#         dr.Phases.BASE,
#         dr.Phases.FUN,
#         dr.Phases.TSST,
#         dr.Phases.MEDI_1,
#         dr.Phases.MEDI_2
#     ],
    # [
    #     dr.Phases.BASE,
    #     dr.Phases.TSST,
    #     dr.Phases.MEDI_1,
    #     dr.Phases.MEDI_2
    # ],
    # [
    #     dr.Phases.BASE,
    #     dr.Phases.MEDI_1,
    #     dr.Phases.MEDI_2
    # ],
    # [
    #     dr.Phases.BASE,
    # ]
# ]

model_phases = [
    dr.Phases.BASE,
    dr.Phases.FUN,
    dr.Phases.TSST,
    dr.Phases.MEDI_1,
    dr.Phases.MEDI_2
]

models = {
    # "SVM": SVC(C=10, gamma=1),  # C=10, gamma=1
    # "KNN": KNeighborsClassifier(n_neighbors=7),
    # "DT": DecisionTreeClassifier(),
    "LogReg": LogisticRegression(max_iter=1000),
    # "Bayes": GaussianNB(),
    "XGB": XGBClassifier(use_label_encoder=False, objective="binary:logistic", eval_metric="logloss")
}

label_type = "stai"
threshold = "fixed"

# for phases in model_phases:
# print(f"PHASES: {phases} " + "-"*30)
# x, y = train.Train_WESAD.get_wesad_data(metrics, phases, verbose=False, label_type=label_type, threshold=threshold)
x, y = train.Train_WESAD.get_wesad_data(metrics, model_phases, verbose=False, label_type=label_type, threshold=threshold)
x = x.drop(["phaseId"], axis=1)
# print(x.isnull().values.any())
# 0-1 scaling
for i in range(2, len(x.columns)):
    if x.columns[i] in metrics:
        data_col = x[x.columns[i]]
        data_col = (data_col - data_col.min())/(data_col.max() - data_col.min())
        x[x.columns[i]] = data_col

acc_results = {
    # "SVM": [],
    "LogReg": [],
    "XGB": []
}
reports = {
    # "SVM": [],
    "LogReg": [],
    "XGB": []
}
num_iters = 10
for _ in range(num_iters):
    try:
        out = train.train_predict(models, x, y, by_subject=True, test_size=0.15, save_metrics=True, is_resample=False)
        for model_name in acc_results:
            acc_results[model_name].append(out[model_name][0])
            reports[model_name].append(out[model_name][1])
    except Exception as e:
        print("Error in resampling train/test data")

for model_name in acc_results.keys():
    acc = np.mean(acc_results[model_name])
    print(f"{model_name} accuracy over {num_iters} rounds: {acc}")
    if acc > 0.5:
        print(f"Model evaluation metrics for {model_name}:")
        p = np.mean([report["precision"] for report in reports[model_name]])
        r = np.mean([report["recall"] for report in reports[model_name]])
        f1 = np.mean([report["f1"] for report in reports[model_name]])
        auc = np.mean([report["auc"] for report in reports[model_name]])
        report = reports[model_name]
        print(f"Precision: {p}\nRecall: {r}\nF1-score: {f1}\nAUC score: {auc}")
print("\n")

y_train:
0    47
1    13
Name: label, dtype: int64
y_test:
0    8
1    2
Name: label, dtype: int64
Model LogReg, Predictions: [0], [10]
Model XGB, Predictions: [0 1], [9 1]
y_train:
0    47
1    13
Name: label, dtype: int64
y_test:
0    8
1    2
Name: label, dtype: int64
Model LogReg, Predictions: [0], [10]
Model XGB, Predictions: [0], [10]
y_train:
0    47
1    13
Name: label, dtype: int64
y_test:
0    8
1    2
Name: label, dtype: int64
Model LogReg, Predictions: [0], [10]
Model XGB, Predictions: [0], [10]
y_train:
0    47
1    13
Name: label, dtype: int64
y_test:
0    8
1    2
Name: label, dtype: int64
Model LogReg, Predictions: [0], [10]
Model XGB, Predictions: [0 1], [8 2]
y_train:
0    47
1    13
Name: label, dtype: int64
y_test:
0    8
1    2
Name: label, dtype: int64
Model LogReg, Predictions: [0], [10]
Model XGB, Predictions: [0 1], [8 2]
y_train:
0    47
1    13
Name: label, dtype: int64
y_test:
0    8
1    2
Name: label, dtype: int64
Model LogReg, Predictions: [0], [10]
Model