In [4]:
# IMPORTING MODULES
import glob
import importlib
import matplotlib.pyplot as plt
import numpy as np
import os
cvx_path = os.path.abspath(os.path.join('..', '..', 'cvxEDA', 'src'))
module_path = os.path.abspath(os.path.join('..', '..', 'src'))
import pandas as pd
import random
import scipy.signal as ss
import shap
import sys
sys.path.append(module_path)

import tools.data_reader_apd as dr_a
import tools.data_reader_case as dr_c
import tools.data_reader_wesad as dr_w
import tools.display_tools as dt
import tools.preprocessing as preprocessing
import train

from sklearn.linear_model import LinearRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import train_test_split, cross_val_score, RepeatedKFold
from sklearn.preprocessing import normalize
from xgboost import XGBClassifier

import cvxopt.solvers
cvxopt.solvers.options['show_progress'] = False

import warnings
warnings.filterwarnings(
    "ignore", 
    category=RuntimeWarning
)
warnings.simplefilter(action='ignore', category=FutureWarning)

In [27]:
anxiety_label_type = None
case_label_type = dr_c.SelfReports.AROUSAL
wesad_label_type = "stai"

threshold = "fixed"

metrics = [
    train.Metrics.BPM, 
    train.Metrics.RMSSD, 
    train.Metrics.HF_RR, 
    train.Metrics.LF_RR, 
    train.Metrics.SDNN, 
    train.Metrics.MEAN_SCL, 
    train.Metrics.SCR_RATE
# ]
] + train.Metrics.STATISTICAL

model_phases_apd = [
    "Baseline_Rest", 
    "BugBox_Relax", "BugBox_Anticipate", "BugBox_Exposure", "BugBox_Break",
    "Speech_Relax", "Speech_Anticipate", "Speech_Exposure", "Speech_Break"
]
model_phases_wesad = dr_w.Phases.PHASE_ORDER

percent_of_target_dataset = 0.0

temp_a, _ = train.Train_APD.get_apd_data_ranking([train.Metrics.BPM], phases=dr_a.Phases.PHASES_LIST, normalize=False)
idx = temp_a[temp_a["bpm"] > 200].index 
invalid_apd_subjects = set(temp_a["subject"].iloc[idx].tolist())
idx = temp_a[temp_a["bpm"] < 35].index 
invalid_apd_subjects.update(set(temp_a["subject"].iloc[idx].tolist()))

temp_a, _ = train.Train_CASE.get_case_data(metrics, verbose=False, label_type=case_label_type, threshold=threshold, normalize=False)
idx = temp_a[temp_a["bpm"] > 200].index 
invalid_case_subjects = set(temp_a["subject"].iloc[idx].tolist())
idx = temp_a[temp_a["bpm"] < 35].index 
invalid_case_subjects.update(set(temp_a["subject"].iloc[idx].tolist()))

temp_a, _ = train.Train_WESAD.get_wesad_data([train.Metrics.BPM], phases=dr_w.Phases.PHASE_ORDER, normalize=False)
idx = temp_a[temp_a["bpm"] > 200].index 
invalid_wesad_subjects = set(temp_a["subject"].iloc[idx].tolist())
idx = temp_a[temp_a["bpm"] < 35].index 
invalid_wesad_subjects.update(set(temp_a["subject"].iloc[idx].tolist()))


x_a, y_a = train.Train_APD.get_apd_data_ranking(metrics, model_phases_apd, verbose=False, anxiety_label_type=anxiety_label_type, threshold=threshold, normalize=True, standardize=False)
x_c, y_c = train.Train_CASE.get_case_data(metrics, verbose=False, label_type=case_label_type, threshold=threshold, normalize=True, standardize=False)
x_w, y_w = train.Train_WESAD.get_wesad_data(metrics, model_phases_wesad, verbose=False, label_type=wesad_label_type, threshold=threshold, normalize=True, standardize=False)

x_a = x_a.drop(["phaseId"], axis=1)
x_c = x_c.drop(["phaseId"], axis=1)
x_w = x_w.drop(["phaseId"], axis=1)

inds = pd.isnull(x_a).any(axis=1).to_numpy().nonzero()[0]
x_a = x_a.drop(labels=inds, axis=0).reset_index(drop=True)
y_a = y_a.drop(labels=inds, axis=0).reset_index(drop=True)
inds = pd.isnull(x_c).any(axis=1).to_numpy().nonzero()[0]
x_c = x_c.drop(labels=inds, axis=0).reset_index(drop=True)
y_c = y_c.drop(labels=inds, axis=0).reset_index(drop=True)
inds = pd.isnull(x_w).any(axis=1).to_numpy().nonzero()[0]
x_w = x_w.drop(labels=inds, axis=0).reset_index(drop=True)
y_w = y_w.drop(labels=inds, axis=0).reset_index(drop=True)

In [28]:
# REGRESSION
importlib.reload(train)
importlib.reload(dr_a)
importlib.reload(dr_c)
importlib.reload(dr_w)
importlib.reload(dt)


random.seed(15)

model = LinearRegression()
model.fit(x_a, y_a)
r2_apd = model.score(x_a, y_a)
print(f"R^2 value for APD: {r2_apd}")

model.fit(x_c, y_c)
r2_case = model.score(x_c, y_c)
print(f"R^2 value for CASE: {r2_case}")

model.fit(x_w, y_w)
r2_wesad = model.score(x_w, y_w)
print(f"R^2 value for WESAD: {r2_wesad}")

R^2 value for APD: 0.5783447536891101
R^2 value for CASE: 0.656024860790551
R^2 value for WESAD: 0.8467110448638273


## Cross-corpus

In [16]:
# REGRESSION
importlib.reload(train)
importlib.reload(dr_a)
importlib.reload(dr_w)
importlib.reload(dt)

from scipy import stats


random.seed(81)


model = LinearRegression()
# model.fit(x_a, y_a)
# r2_apd_case = model.score(x_c, y_c)
# print(f"R^2 value for APD/CASE: {r2_apd_case}")

# model.fit(x_c, y_c)
# r2_case_apd = model.score(x_a, y_a)
# print(f"R^2 value for CASE/APD: {r2_case_apd}")

x_train = pd.concat([x_a, x_c])
y_train = pd.concat([y_a, y_c])
model.fit(x_train, y_train)
r2_apd_case = model.score(x_train, y_train)
print(f"R^2 value for APD/CASE: {r2_apd_case}")

###########################################################

model = LinearRegression()
# model.fit(x_a, y_a)
# r2_apd_wesad = model.score(x_w, y_w)
# print(f"R^2 value for APD/WESAD: {r2_apd_wesad}")

# model.fit(x_w, y_w)
# r2_wesad_apd = model.score(x_a, y_a)
# print(f"R^2 value for WESAD/APD: {r2_wesad_apd}")

x_train = pd.concat([x_a, x_w])
y_train = pd.concat([y_a, y_w])
model.fit(x_train, y_train)
r2_apd_wesad = model.score(x_train, y_train)
print(f"R^2 value for APD/WESAD: {r2_apd_wesad}")

###########################################################

model = LinearRegression()
# model.fit(x_c, y_c)
# r2_case_wesad = model.score(x_w, y_w)
# print(f"R^2 value for CASE/WESAD: {r2_case_wesad}")

# model.fit(x_w, y_w)
# r2_wesad_case = model.score(x_c, y_c)
# print(f"R^2 value for WESAD/CASE: {r2_wesad_case}")

x_train = pd.concat([x_c, x_w])
y_train = pd.concat([y_c, y_w])
model.fit(x_train, y_train)
r2_case_wesad = model.score(x_train, y_train)
print(f"R^2 value for CASE/WESAD: {r2_case_wesad}")


R^2 value for APD/CASE: 0.5372716938219968
R^2 value for APD/WESAD: 0.5787879384916446
R^2 value for CASE/WESAD: 0.5918678805474745


## Leave-one-corpus-out

In [15]:
# REGRESSION
importlib.reload(train)
importlib.reload(dr_a)
importlib.reload(dr_w)
importlib.reload(dt)

from scipy import stats


random.seed(81)


model = LinearRegression()
x_train = pd.concat([x_c, x_w])
y_train = pd.concat([y_c, y_w])
model.fit(x_train, y_train)
r2_loo_apd = model.score(x_a, y_a)
print(f"R^2 value for LOO APD: {r2_loo_apd}")

###########################################################

model = LinearRegression()
x_train = pd.concat([x_a, x_w])
y_train = pd.concat([y_a, y_w])
model.fit(x_train, y_train)
r2_loo_case = model.score(x_c, y_c)
print(f"R^2 value for LOO CASE: {r2_loo_case}")

###########################################################

model = LinearRegression()
x_train = pd.concat([x_a, x_c])
y_train = pd.concat([y_a, y_c])
model.fit(x_train, y_train)
r2_loo_wesad = model.score(x_w, y_w)
print(f"R^2 value for LOO WESAD: {r2_loo_wesad}")

R^2 value for LOO APD: -0.00644460153303239
R^2 value for LOO CASE: 0.4551259165843814
R^2 value for LOO WESAD: 0.3923797186407745
