In [2]:
# IMPORTING MODULES
import glob
import importlib
import matplotlib.pyplot as plt
import numpy as np
import os
cvx_path = os.path.abspath(os.path.join('..', '..', 'cvxEDA', 'src'))
module_path = os.path.abspath(os.path.join('..', '..', 'src'))
import pandas as pd
import random
import scipy.signal as ss
import sys
sys.path.append(module_path)

import tools.data_reader_apd as dr_a
import tools.data_reader_case as dr_c
import tools.data_reader_wesad as dr_w
import tools.display_tools as dt
import tools.preprocessing as preprocessing
import train

from scipy.fft import fft, fftfreq, fftshift
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import train_test_split, cross_val_score, RepeatedKFold
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import normalize
from xgboost import XGBClassifier

import cvxopt.solvers
cvxopt.solvers.options['show_progress'] = False

import warnings
warnings.filterwarnings(
    "ignore", 
    category=RuntimeWarning
)

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
importlib.reload(dt)
importlib.reload(train)
importlib.reload(preprocessing)
importlib.reload(dr_a)
importlib.reload(dr_c)
importlib.reload(dr_w)

anxiety_label_type = None
case_label_type = dr_c.SelfReports.AROUSAL
wesad_label_type = "stai"

threshold = "fixed"
test_size = 1.0
percent_of_target_dataset = 0.0

metrics = [
    train.Metrics.BPM, 
    train.Metrics.RMSSD, 
    train.Metrics.HF_RR, 
    train.Metrics.LF_RR, 
    train.Metrics.SDNN, 
    train.Metrics.MEAN_SCL, 
    train.Metrics.SCR_RATE
# ]
] + train.Metrics.STATISTICAL

model_phases_apd = [
    "Baseline_Rest", 
    "BugBox_Relax", "BugBox_Anticipate", "BugBox_Exposure", "BugBox_Break",
    "Speech_Relax", "Speech_Anticipate", "Speech_Exposure", "Speech_Break"
]
model_phases_wesad = dr_w.Phases.PHASE_ORDER

temp_a, _ = train.Train_APD.get_apd_data_ranking([train.Metrics.BPM], phases=dr_a.Phases.PHASES_LIST, normalize=False)
idx = temp_a[temp_a["bpm"] > 200].index 
invalid_apd_subjects = set(temp_a["subject"].iloc[idx].tolist())
idx = temp_a[temp_a["bpm"] < 35].index 
invalid_apd_subjects.update(set(temp_a["subject"].iloc[idx].tolist()))

temp_a, _ = train.Train_CASE.get_case_data(metrics, verbose=False, label_type=case_label_type, threshold=threshold, normalize=False)
idx = temp_a[temp_a["bpm"] > 200].index 
invalid_case_subjects = set(temp_a["subject"].iloc[idx].tolist())
idx = temp_a[temp_a["bpm"] < 35].index 
invalid_case_subjects.update(set(temp_a["subject"].iloc[idx].tolist()))

temp_a, _ = train.Train_WESAD.get_wesad_data([train.Metrics.BPM], phases=dr_w.Phases.PHASE_ORDER, normalize=False)
idx = temp_a[temp_a["bpm"] > 200].index 
invalid_wesad_subjects = set(temp_a["subject"].iloc[idx].tolist())
idx = temp_a[temp_a["bpm"] < 35].index 
invalid_wesad_subjects.update(set(temp_a["subject"].iloc[idx].tolist()))

x_a, y_a = train.Train_APD.get_apd_data_ranking(metrics, model_phases_apd, verbose=False, anxiety_label_type=anxiety_label_type, threshold=threshold, normalize=True, standardize=False)
x_c, y_c = train.Train_CASE.get_case_data(metrics, verbose=False, label_type=case_label_type, threshold=threshold, normalize=True, standardize=False)
x_w, y_w = train.Train_WESAD.get_wesad_data(metrics, model_phases_wesad, verbose=False, label_type=wesad_label_type, threshold=threshold, normalize=True, standardize=False)
# drop subjects with noisy data
# x_a = x_a[~x_a["subject"].isin(invalid_apd_subjects)].reset_index(drop=True)
# y_a = y_a[~y_a["subject"].isin(invalid_apd_subjects)].reset_index(drop=True)

if anxiety_label_type is not None:
    x_a = x_a.drop(["anxietyGroup"], axis=1)  # drop anxietyGroup column because WESAD doesn't have this feature

x_a = x_a.drop(["phaseId"], axis=1)
x_c = x_c.drop(["phaseId"], axis=1)
x_w = x_w.drop(["phaseId"], axis=1)

inds = pd.isnull(x_a).any(axis=1).to_numpy().nonzero()[0]
x_a = x_a.drop(labels=inds, axis=0).reset_index(drop=True)
y_a = y_a.drop(labels=inds, axis=0).reset_index(drop=True)
inds = pd.isnull(x_c).any(axis=1).to_numpy().nonzero()[0]
x_c = x_c.drop(labels=inds, axis=0).reset_index(drop=True)
y_c = y_c.drop(labels=inds, axis=0).reset_index(drop=True)
inds = pd.isnull(x_w).any(axis=1).to_numpy().nonzero()[0]
x_w = x_w.drop(labels=inds, axis=0).reset_index(drop=True)
y_w = y_w.drop(labels=inds, axis=0).reset_index(drop=True)

# make sure subjects from different datasets aren't labeled with the same index
x_c["subject"] = x_c["subject"] + 500
y_c["subject"] = y_c["subject"] + 500
x_w["subject"] = x_w["subject"] + 1000
y_w["subject"] = y_w["subject"] + 1000

In [4]:
importlib.reload(dt)
importlib.reload(train)
importlib.reload(preprocessing)
importlib.reload(dr_a)
importlib.reload(dr_c)
importlib.reload(dr_w)

import statsmodels.api as sm
from statsmodels.regression.mixed_linear_model import MixedLM


endog = y_a.loc[:, "label"]
exog = x_a.loc[:, x_a.columns != "subject"]
groups = x_a.loc[:, "subject"]
model = MixedLM(endog, exog, groups)
result = model.fit()

print("APD " + "-"*49 + "\n")
print(result.summary())
print(result.pvalues)
print("\n")

######################################################################################

endog = y_c.loc[:, "label"]
exog = x_c.loc[:, x_c.columns != "subject"]
groups = x_c.loc[:, "subject"]
model = MixedLM(endog, exog, groups)
result = model.fit()

print("CASE " + "-"*49 + "\n")
print(result.summary())
print(result.pvalues)
print("\n")

######################################################################################

endog = y_w.loc[:, "label"]
exog = x_w.loc[:, x_w.columns != "subject"]
groups = x_w.loc[:, "subject"]
model = MixedLM(endog, exog, groups)
result = model.fit()

print("WESAD " + "-"*49 + "\n")
print(result.summary())
print(result.pvalues)
print("\n")

APD -------------------------------------------------

         Mixed Linear Model Regression Results
Model:            MixedLM Dependent Variable: label    
No. Observations: 1034    Method:             REML     
No. Groups:       52      Scale:              0.1624   
Min. group size:  18      Log-Likelihood:     -592.7035
Max. group size:  29      Converged:          Yes      
Mean group size:  19.9                                 
-------------------------------------------------------
            Coef.  Std.Err.   z    P>|z|  [0.025 0.975]
-------------------------------------------------------
bpm          1.010    0.141  7.138 0.000   0.733  1.287
rmssd       -0.218    0.387 -0.565 0.572  -0.977  0.540
hf_rr       -0.121    0.406 -0.297 0.766  -0.916  0.675
lf_rr        0.180    0.259  0.694 0.488  -0.328  0.688
sdnn         0.256    0.397  0.644 0.519  -0.522  1.034
mean_SCL     1.635    7.070  0.231 0.817 -12.222 15.492
SCR_rate    -0.408    0.100 -4.079 0.000  -0.604 -0.212
ec

Maximum Likelihood optimization failed to converge. Check mle_retvals
Retrying MixedLM optimization with lbfgs


CASE -------------------------------------------------

         Mixed Linear Model Regression Results
Model:            MixedLM Dependent Variable: label    
No. Observations: 480     Method:             REML     
No. Groups:       30      Scale:              0.1291   
Min. group size:  16      Log-Likelihood:     -208.4504
Max. group size:  16      Converged:          Yes      
Mean group size:  16.0                                 
-------------------------------------------------------
            Coef.  Std.Err.   z    P>|z|  [0.025 0.975]
-------------------------------------------------------
bpm          0.584    0.161  3.630 0.000   0.268  0.899
rmssd        0.135    0.233  0.578 0.563  -0.322  0.591
hf_rr       -0.384    0.497 -0.772 0.440  -1.359  0.591
lf_rr        0.275    0.437  0.630 0.529  -0.581  1.132
sdnn         0.128    0.214  0.598 0.550  -0.291  0.547
mean_SCL     0.588    0.210  2.800 0.005   0.176  0.999
SCR_rate    -0.134    0.116 -1.161 0.246  -0.361  0.092
e

In [6]:
importlib.reload(dt)
importlib.reload(train)
importlib.reload(preprocessing)
importlib.reload(dr_a)
importlib.reload(dr_c)
importlib.reload(dr_w)

import statsmodels.api as sm
from sklearn.linear_model import LinearRegression


endog = y_a.loc[:, "label"]
exog = x_a.loc[:, x_a.columns != "subject"]
groups = x_a.loc[:, "subject"]
model = LinearRegression()
result = model.fit(exog, endog)

print("APD " + "-"*49 + "\n")
print(result.feature_names_in_)
print(result.coef_)
print(model.score(exog, endog))
print("\n")

######################################################################################

endog = y_c.loc[:, "label"]
exog = x_c.loc[:, x_c.columns != "subject"]
groups = x_c.loc[:, "subject"]
model = LinearRegression()
result = model.fit(exog, endog)

print("CASE " + "-"*49 + "\n")
print(result.feature_names_in_)
print(result.coef_)
print(model.score(exog, endog))
print("\n")

######################################################################################

endog = y_w.loc[:, "label"]
exog = x_w.loc[:, x_w.columns != "subject"]
groups = x_w.loc[:, "subject"]
model = LinearRegression()
result = model.fit(exog, endog)

print("WESAD " + "-"*49 + "\n")
print(result.feature_names_in_)
print(result.coef_)
print(model.score(exog, endog))
print("\n")

APD -------------------------------------------------

['bpm' 'rmssd' 'hf_rr' 'lf_rr' 'sdnn' 'mean_SCL' 'SCR_rate' 'ecg_mean'
 'ecg_median' 'ecg_std' 'ecg_var' 'eda_mean' 'eda_median' 'eda_std'
 'eda_var' 'lf_hf_ratio']
[ 0.49618766 -0.71085269  0.24153432  0.45375601  0.6083763  -1.35187015
 -0.17714132 -0.84289901  0.66908912  0.01562406 -0.37301554  1.27334681
 -0.12485993  0.58652536 -0.49347345 -0.69701416]
0.0758898606567785


CASE -------------------------------------------------

['bpm' 'rmssd' 'hf_rr' 'lf_rr' 'sdnn' 'mean_SCL' 'SCR_rate' 'ecg_mean'
 'ecg_median' 'ecg_std' 'ecg_var' 'eda_mean' 'eda_median' 'eda_std'
 'eda_var' 'lf_hf_ratio']
[ 0.8243417  -0.02104352 -0.02244729 -0.01703054  0.32188635  0.74135473
 -0.13090411 -1.03670289  0.86238123 -0.80786686  0.02375202  2.24626625
 -3.50872453  0.44992222 -0.02033878  0.02702177]
0.3229580324882859


WESAD -------------------------------------------------

['bpm' 'rmssd' 'hf_rr' 'lf_rr' 'sdnn' 'mean_SCL' 'SCR_rate' 'ecg_mea