### Can we classify each phase as relatively low or high anxiety for each subject? ###

In [2]:
# IMPORTING MODULES
import glob
import importlib
import matplotlib.pyplot as plt
import numpy as np
import os
cvx_path = os.path.abspath(os.path.join('..', '..', 'cvxEDA', 'src'))
module_path = os.path.abspath(os.path.join('..', '..', 'src'))
sys.path.append(module_path)
import pandas as pd
import random
import scipy.signal as ssAAZ
import sys

import tools.data_reader_apd as dr_a
import tools.data_reader_wesad as dr_w
import tools.display_tools as dt
import tools.preprocessing as preprocessing
import train

from scipy.fft import fft, fftfreq, fftshift
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import train_test_split, cross_val_score, RepeatedKFold
from sklearn.preprocessing import normalize

import cvxopt.solvers
cvxopt.solvers.options['show_progress'] = False

import warnings
warnings.filterwarnings(
    "ignore", 
    category=RuntimeWarning
)

In [12]:
# metrics = train.Metrics.ALL
# metrics = train.Metrics.ECG \
    # + train.Metrics.EDA \
    # + train.Metrics.ANKLE + train.Metrics.WRIAAST

metrics = [
    train.Metrics.BPM, 
    train.Metrics.RMSSD, 
    train.Metrics.HF_RR, 
    train.Metrics.LF_RR, 
    train.Metrics.IBI, 
    train.Metrics.SDNN, 
    train.Metrics.MEAN_SCL, 
    train.Metrics.SCR_RATE
]

model_phases_apd = [
    [
        "Baseline_Rest", 
        "BugBox_Relax", "BugBox_Anticipate", "BugBox_Exposure", "BugBox_Break",
        "Speech_Relax", "Speech_Anticipate", "Speech_Exposure", "Speech_Break"
    ],
    [
        "Baseline_Rest", 
        "BugBox_Relax", "BugBox_Anticipate", "BugBox_Break",
        "Speech_Relax", "Speech_Anticipate", "Speech_Break"
    ],
    [
        "Baseline_Rest", 
        "BugBox_Relax", "BugBox_Anticipate", 
        "Speech_Relax", "Speech_Anticipate"
    ],
    [
        "Baseline_Rest", 
        "BugBox_Relax", "BugBox_Anticipate", "BugBox_Break",
        "Speech_Relax", "Speech_Anticipate", "Speech_Break"
    ],
    ["BugBox_Break", "Speech_Break"],
    ["BugBox_Exposure", "Speech_Exposure"]
]

model_phases_wesad = [
    [
        dr_w.Phases.BASE,
        dr_w.Phases.FUN,
        dr_w.Phases.TSST,
        dr_w.Phases.MEDI_1,
        dr_w.Phases.MEDI_2
    ],
    [
        dr_w.Phases.BASE,
        dr_w.Phases.TSST,
        dr_w.Phases.MEDI_1,
        dr_w.Phases.MEDI_2
    ],
    [
        dr_w.Phases.BASE,
        dr_w.Phases.MEDI_1,
        dr_w.Phases.MEDI_2
    ],
    [
        dr_w.Phases.BASE,
    ]
]

apd_label_type = "Trait"
wesad_label_type = "stai"

models = {
    "SVM": SVC(C=10, gamma=1),  # C=10, gamma=1
    "KNN": KNeighborsClassifier(n_neighbors=7),
    "DT": DecisionTreeClassifier(),
    "LogReg": LogisticRegression(max_iter=1000)
}

In [13]:
# TRAIN ON APD AND TEST ON WESAD
importlib.reload(train)
importlib.reload(dr_a)
importlib.reload(dr_w)
importlib.reload(dt)


for i, phases_apd in enumerate(model_phases_apd):
    print(f"APD PHASES {i} " + "-"*50)
    for j, phases_wesad in enumerate(model_phases_wesad):
        print(f"WESAD PHASES {j} " + "-"*50)
        x_a, y_a = train.Train_APD.get_apd_data_ranking(metrics, phases_apd, verbose=False, anxiety_label_type=apd_label_type)
        x_b, y_b = train.Train_WESAD.get_wesad_data(metrics, phases_wesad, verbose=False, label_type=wesad_label_type)
        # drop subjects with noisy data
        x_a = x_a[x_a['subject'] != 84.0]
        x_a = x_a.drop(["anxietyGroup"], axis=1)  # drop anxietyGroup column because WESAD doesn't have this feature
        y_a = y_a[y_a['subject'] != 84.0]
        # x = x[x['subject'] != 8.0]
        # y = y[y['subject'] != 8.0]

        # 0-1 scaling
        for i in range(3, len(x_a.columns)):
            data_col = x_a[x_a.columns[i]]
            data_col = (data_col - data_col.min())/(data_col.max() - data_col.min())
            x_a[x_a.columns[i]] = data_col
        # 0-1 scaling
        for i in range(3, len(x_b.columns)):
            data_col = x_b[x_a.columns[i]]
            data_col = (data_col - data_col.min())/(data_col.max() - data_col.min())
            x_b[x_a.columns[i]] = data_col

        results = {
            "SVM": [], "KNN": [],
            "DT": [],
            "LogReg": []
        }
        num_iters = 10
        for _ in range(num_iters):
            out = train.Train_Multi_Dataset.train_across_datasets(models, x_a, y_a, x_b, y_b, by_subject=True, show_classification=False)
            for model_name in results:
                results[model_name].append(out[model_name])

        for model_name in results.keys():
            print(f"{model_name} accuracy over {num_iters} rounds: {np.mean(results[model_name])}")
        print("\n")

APD PHASES 0 --------------------------------------------------
WESAD PHASES 0 --------------------------------------------------
SVM accuracy over 10 rounds: 0.4709090909090909
KNN accuracy over 10 rounds: 0.5527272727272727
DT accuracy over 10 rounds: 0.5836363636363636
LogReg accuracy over 10 rounds: 0.4163636363636363


WESAD PHASES 1 --------------------------------------------------
SVM accuracy over 10 rounds: 0.4363636363636364
KNN accuracy over 10 rounds: 0.475
DT accuracy over 10 rounds: 0.5568181818181819
LogReg accuracy over 10 rounds: 0.3954545454545455


WESAD PHASES 2 --------------------------------------------------
SVM accuracy over 10 rounds: 0.4666666666666668
KNN accuracy over 10 rounds: 0.44545454545454544
DT accuracy over 10 rounds: 0.5484848484848486
LogReg accuracy over 10 rounds: 0.4606060606060606


WESAD PHASES 3 --------------------------------------------------
SVM accuracy over 10 rounds: 0.16363636363636364
KNN accuracy over 10 rounds: 0.1818181818181818

In [14]:
# TRAIN ON WESAD AND TEST ON APD
importlib.reload(train)
importlib.reload(dr_a)
importlib.reload(dr_w)
importlib.reload(dt)


for i, phases_wesad in enumerate(model_phases_wesad):
    print(f"WESAD PHASES {i} " + "-"*50)
    for j, phases_apd in enumerate(model_phases_apd):
        print(f"APD PHASES {j} " + "-"*50)
        x_a, y_a = train.Train_WESAD.get_wesad_data(metrics, phases_wesad, verbose=False, label_type=wesad_label_type)
        x_b, y_b = train.Train_APD.get_apd_data_ranking(metrics, phases_apd, verbose=False, anxiety_label_type=apd_label_type)
        # drop subjects with noisy data
        x_b = x_b[x_b['subject'] != 84.0]
        x_b = x_b.drop(["anxietyGroup"], axis=1)  # drop anxietyGroup column because WESAD doesn't have this feature
        y_b = y_b[y_b['subject'] != 84.0]
        # x = x[x['subject'] != 8.0]
        # y = y[y['subject'] != 8.0]

        # 0-1 scaling
        for i in range(3, len(x_a.columns)):
            data_col = x_a[x_a.columns[i]]
            data_col = (data_col - data_col.min())/(data_col.max() - data_col.min())
            x_a[x_a.columns[i]] = data_col
        # 0-1 scaling
        for i in range(3, len(x_b.columns)):
            data_col = x_b[x_a.columns[i]]
            data_col = (data_col - data_col.min())/(data_col.max() - data_col.min())
            x_b[x_a.columns[i]] = data_col

        results = {
            "SVM": [], "KNN": [],
            "DT": [],
            "LogReg": []
        }
        num_iters = 10
        for _ in range(num_iters):
            out = train.Train_Multi_Dataset.train_across_datasets(models, x_a, y_a, x_b, y_b, by_subject=True, show_classification=False)
            for model_name in results:
                results[model_name].append(out[model_name])

        for model_name in results.keys():
            print(f"{model_name} accuracy over {num_iters} rounds: {np.mean(results[model_name])}")
        print("\n")

WESAD PHASES 0 --------------------------------------------------
APD PHASES 0 --------------------------------------------------
SVM accuracy over 10 rounds: 0.6669444444444446
KNN accuracy over 10 rounds: 0.6141666666666665
DT accuracy over 10 rounds: 0.6316666666666667
LogReg accuracy over 10 rounds: 0.6452777777777778


APD PHASES 1 --------------------------------------------------
SVM accuracy over 10 rounds: 0.6621428571428571
KNN accuracy over 10 rounds: 0.6085714285714285
DT accuracy over 10 rounds: 0.6157142857142859
LogReg accuracy over 10 rounds: 0.6375


APD PHASES 2 --------------------------------------------------
SVM accuracy over 10 rounds: 0.6859999999999999
KNN accuracy over 10 rounds: 0.6544999999999999
DT accuracy over 10 rounds: 0.6184999999999998
LogReg accuracy over 10 rounds: 0.663


APD PHASES 3 --------------------------------------------------
SVM accuracy over 10 rounds: 0.6585714285714286
KNN accuracy over 10 rounds: 0.6039285714285715
DT accuracy over 10