In [1]:
# IMPORTING MODULES
import glob
import importlib
import matplotlib.pyplot as plt
import numpy as np
import os
cvx_path = os.path.abspath(os.path.join('..', '..', 'cvxEDA', 'src'))
module_path = os.path.abspath(os.path.join('..', '..', 'src'))
import pandas as pd
import random
import scipy.signal as ss
import shap
import sys
sys.path.append(module_path)

import tools.data_reader_apd as dr_a
import tools.data_reader_case as dr_c
import tools.data_reader_wesad as dr_w
import tools.display_tools as dt
import tools.preprocessing as preprocessing
import train

import lightgbm as lgb
from lightgbm import LGBMClassifier
from scipy.fft import fft, fftfreq, fftshift
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import train_test_split, cross_val_score, RepeatedKFold
from sklearn.preprocessing import normalize
from xgboost import XGBClassifier

import cvxopt.solvers
cvxopt.solvers.options['show_progress'] = False

import warnings
warnings.filterwarnings(
    "ignore", 
    category=RuntimeWarning
)
warnings.simplefilter(action='ignore', category=FutureWarning)

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
anxiety_label_type = None
case_label_type = dr_c.SelfReports.AROUSAL
wesad_label_type = "stai"

threshold = "fixed"

metrics = [
    train.Metrics.BPM, 
    train.Metrics.RMSSD, 
    train.Metrics.HF_RR, 
    train.Metrics.LF_RR, 
    train.Metrics.SDNN, 
    train.Metrics.MEAN_SCL, 
    train.Metrics.SCR_RATE
# ]
] + train.Metrics.STATISTICAL

model_phases_apd = [
    "Baseline_Rest", 
    "BugBox_Relax", "BugBox_Anticipate", "BugBox_Exposure", "BugBox_Break",
    "Speech_Relax", "Speech_Anticipate", "Speech_Exposure", "Speech_Break"
]
model_phases_wesad = dr_w.Phases.PHASE_ORDER

temp_a, _ = train.Train_APD.get_apd_data_ranking([train.Metrics.BPM], phases=dr_a.Phases.PHASES_LIST, normalize=False)
idx = temp_a[temp_a["bpm"] > 200].index 
invalid_apd_subjects = set(temp_a["subject"].iloc[idx].tolist())
idx = temp_a[temp_a["bpm"] < 35].index 
invalid_apd_subjects.update(set(temp_a["subject"].iloc[idx].tolist()))

temp_a, _ = train.Train_CASE.get_case_data(metrics, verbose=False, label_type=case_label_type, threshold=threshold, normalize=False)
idx = temp_a[temp_a["bpm"] > 200].index 
invalid_case_subjects = set(temp_a["subject"].iloc[idx].tolist())
idx = temp_a[temp_a["bpm"] < 35].index 
invalid_case_subjects.update(set(temp_a["subject"].iloc[idx].tolist()))

temp_a, _ = train.Train_WESAD.get_wesad_data([train.Metrics.BPM], phases=dr_w.Phases.PHASE_ORDER, normalize=False)
idx = temp_a[temp_a["bpm"] > 200].index 
invalid_wesad_subjects = set(temp_a["subject"].iloc[idx].tolist())
idx = temp_a[temp_a["bpm"] < 35].index 
invalid_wesad_subjects.update(set(temp_a["subject"].iloc[idx].tolist()))

## APD/WESAD

In [7]:
# T-TEST
importlib.reload(train)
importlib.reload(dr_a)
importlib.reload(dr_w)
importlib.reload(dt)

from scipy import stats


metrics = [
    train.Metrics.BPM, 
    train.Metrics.RMSSD, 
    train.Metrics.HF_RR, 
    train.Metrics.LF_RR, 
    train.Metrics.SDNN, 
    train.Metrics.MEAN_SCL, 
    train.Metrics.SCR_RATE
# ]
] + train.Metrics.STATISTICAL

for metric in metrics:
    x_a, y_a = train.Train_WESAD.get_wesad_data(metrics, model_phases_wesad, verbose=False, label_type=wesad_label_type, threshold=threshold, normalize=True)
    x_b, y_b = train.Train_APD.get_apd_data_ranking([metric], model_phases_apd, verbose=False, anxiety_label_type=anxiety_label_type, threshold=threshold, normalize=True)
    # drop subjects with noisy data
    x_b = x_b[~x_b["subject"].isin(invalid_apd_subjects)].reset_index(drop=True)
    y_b = y_b[~y_b["subject"].isin(invalid_apd_subjects)].reset_index(drop=True)

    x_a = x_a.drop(["phaseId"], axis=1)
    x_b = x_b.drop(["phaseId"], axis=1)

    inds = pd.isnull(x_a).any(axis=1).to_numpy().nonzero()[0]
    x_a = x_a.drop(labels=inds, axis=0).reset_index(drop=True)
    y_a = y_a.drop(labels=inds, axis=0).reset_index(drop=True)
    inds = pd.isnull(x_b).any(axis=1).to_numpy().nonzero()[0]
    x_b = x_b.drop(labels=inds, axis=0).reset_index(drop=True)
    y_b = y_b.drop(labels=inds, axis=0).reset_index(drop=True)

    positive_class_a = y_a.index[y_a["label"] == 1].tolist()
    positive_class_b = y_b.index[y_b["label"] == 1].tolist()

    x_a = x_a.loc[positive_class_a, metric]
    x_b = x_b.loc[positive_class_b, metric]

    t, p_val = stats.ttest_ind(x_a, x_b, equal_var=False)
    print(f"Metric: {metric}\n\tt-statistic: {t}\n\tp-value: {p_val}")

Metric: bpm
	t-statistic: -0.7273109930677188
	p-value: 0.4686081985650258
Metric: rmssd
	t-statistic: -0.7496856249138365
	p-value: 0.454837865105846
Metric: hf_rr
	t-statistic: 7.714627943632489
	p-value: 1.2283158963185079e-12
Metric: lf_rr
	t-statistic: 7.416843944947361
	p-value: 1.424650476939071e-11
Metric: sdnn
	t-statistic: 0.7713888658538025
	p-value: 0.44205281809067
Metric: mean_SCL
	t-statistic: 0.21744329101957252
	p-value: 0.8281594335093369
Metric: SCR_rate
	t-statistic: 7.16375393019872
	p-value: 5.6576086306701726e-11
Metric: ecg_iqr
	t-statistic: -5.852861043159752
	p-value: 3.613384666308438e-08
Metric: ecg_kurtosis
	t-statistic: 16.558787697143607
	p-value: 6.478964720723567e-31
Metric: ecg_mean
	t-statistic: -36.732326215962246
	p-value: 4.8775309468158467e-60
Metric: ecg_median
	t-statistic: -36.12235325007599
	p-value: 2.5553223875086527e-59
Metric: ecg_rms
	t-statistic: -35.60797283635442
	p-value: 7.612145999633705e-59
Metric: ecg_skew
	t-statistic: 3.76137769

## APD/CASE

In [8]:
# T-TEST
importlib.reload(train)
importlib.reload(dr_a)
importlib.reload(dr_c)
importlib.reload(dt)

from scipy import stats


metrics = [
    train.Metrics.BPM, 
    train.Metrics.RMSSD, 
    train.Metrics.HF_RR, 
    train.Metrics.LF_RR, 
    train.Metrics.SDNN, 
    train.Metrics.MEAN_SCL, 
    train.Metrics.SCR_RATE
# ]
] + train.Metrics.STATISTICAL

for metric in metrics:
    x_a, y_a = train.Train_CASE.get_case_data([metric], verbose=False, label_type=case_label_type, threshold=threshold, normalize=True)
    x_b, y_b = train.Train_APD.get_apd_data_ranking([metric], model_phases_apd, verbose=False, anxiety_label_type=anxiety_label_type, threshold=threshold, normalize=True)
    # drop subjects with noisy data
    x_b = x_b[~x_b["subject"].isin(invalid_apd_subjects)].reset_index(drop=True)
    y_b = y_b[~y_b["subject"].isin(invalid_apd_subjects)].reset_index(drop=True)

    x_a = x_a.drop(["phaseId"], axis=1)
    x_b = x_b.drop(["phaseId"], axis=1)

    inds = pd.isnull(x_a).any(axis=1).to_numpy().nonzero()[0]
    x_a = x_a.drop(labels=inds, axis=0).reset_index(drop=True)
    y_a = y_a.drop(labels=inds, axis=0).reset_index(drop=True)
    inds = pd.isnull(x_b).any(axis=1).to_numpy().nonzero()[0]
    x_b = x_b.drop(labels=inds, axis=0).reset_index(drop=True)
    y_b = y_b.drop(labels=inds, axis=0).reset_index(drop=True)

    positive_class_a = y_a.index[y_a["label"] == 1].tolist()
    positive_class_b = y_b.index[y_b["label"] == 1].tolist()

    x_a = x_a.loc[positive_class_a, metric]
    x_b = x_b.loc[positive_class_b, metric]


    t, p_val = stats.ttest_ind(x_a, x_b, equal_var=False)
    print(f"Metric: {metric}\n\tt-statistic: {t}\n\tp-value: {p_val}")

Metric: bpm
	t-statistic: -5.766691291262254
	p-value: 5.848069871612389e-08
Metric: rmssd
	t-statistic: 7.268413222617862
	p-value: 2.2753801986451757e-11
Metric: hf_rr
	t-statistic: 1.9857601054105556
	p-value: 0.04892725579892218
Metric: lf_rr
	t-statistic: 0.38728347416661013
	p-value: 0.6989864372682058
Metric: sdnn
	t-statistic: 6.559741820535658
	p-value: 8.447272115651549e-10
Metric: mean_SCL
	t-statistic: 3.4750423036283467
	p-value: 0.0006639166948337674
Metric: SCR_rate
	t-statistic: 0.9912503927581008
	p-value: 0.3230718749685637
Metric: ecg_iqr
	t-statistic: 0.40059771514739106
	p-value: 0.689277316453235
Metric: ecg_kurtosis
	t-statistic: 13.205573494293262
	p-value: 2.827821518151628e-26
Metric: ecg_mean
	t-statistic: -23.41888673322624
	p-value: 3.810909246049233e-48
Metric: ecg_median
	t-statistic: -22.921335631520982
	p-value: 3.7336941294462676e-47
Metric: ecg_rms
	t-statistic: -22.559385587734614
	p-value: 1.9380350045575203e-46
Metric: ecg_skew
	t-statistic: 9.3336

## CASE/WESAD

In [9]:
# T-TEST
importlib.reload(train)
importlib.reload(dr_c)
importlib.reload(dr_w)
importlib.reload(dt)

from scipy import stats


metrics = [
    train.Metrics.BPM, 
    train.Metrics.RMSSD, 
    train.Metrics.HF_RR, 
    train.Metrics.LF_RR, 
    train.Metrics.SDNN, 
    train.Metrics.MEAN_SCL, 
    train.Metrics.SCR_RATE
# ]
] + train.Metrics.STATISTICAL

for metric in metrics:
    x_a, y_a = train.Train_CASE.get_case_data([metric], verbose=False, label_type=case_label_type, threshold=threshold, normalize=True)
    x_b, y_b = train.Train_WESAD.get_wesad_data(metrics, model_phases_wesad, verbose=False, label_type=wesad_label_type, threshold=threshold, normalize=True)

    x_a = x_a.drop(["phaseId"], axis=1)
    x_b = x_b.drop(["phaseId"], axis=1)

    inds = pd.isnull(x_a).any(axis=1).to_numpy().nonzero()[0]
    x_a = x_a.drop(labels=inds, axis=0).reset_index(drop=True)
    y_a = y_a.drop(labels=inds, axis=0).reset_index(drop=True)
    inds = pd.isnull(x_b).any(axis=1).to_numpy().nonzero()[0]
    x_b = x_b.drop(labels=inds, axis=0).reset_index(drop=True)
    y_b = y_b.drop(labels=inds, axis=0).reset_index(drop=True)

    positive_class_a = y_a.index[y_a["label"] == 1].tolist()
    positive_class_b = y_b.index[y_b["label"] == 1].tolist()

    x_a = x_a.loc[positive_class_a, metric]
    x_b = x_b.loc[positive_class_b, metric]

    t, p_val = stats.ttest_ind(x_a, x_b, equal_var=False)
    print(f"Metric: {metric}\n\tt-statistic: {t}\n\tp-value: {p_val}")

Metric: bpm
	t-statistic: -4.0777718854844185
	p-value: 6.48066239068574e-05
Metric: rmssd
	t-statistic: 6.430688571582433
	p-value: 8.62140733942924e-10
Metric: hf_rr
	t-statistic: -2.3791723574978043
	p-value: 0.01837132071556109
Metric: lf_rr
	t-statistic: -5.852150803910497
	p-value: 1.97151179856048e-08
Metric: sdnn
	t-statistic: 4.048562080353125
	p-value: 7.369134721474086e-05
Metric: mean_SCL
	t-statistic: 2.8161005798870433
	p-value: 0.00533809528511482
Metric: SCR_rate
	t-statistic: -4.83209185956067
	p-value: 2.644878562223512e-06
Metric: ecg_iqr
	t-statistic: 4.2555181271346445
	p-value: 3.117952483360366e-05
Metric: ecg_kurtosis
	t-statistic: -3.2792272654617114
	p-value: 0.0012209540169458237
Metric: ecg_mean
	t-statistic: 4.514719269018679
	p-value: 1.0448016388558603e-05
Metric: ecg_median
	t-statistic: 4.424979124209576
	p-value: 1.5341274958634285e-05
Metric: ecg_rms
	t-statistic: 3.9048798355287717
	p-value: 0.0001264644018852916
Metric: ecg_skew
	t-statistic: 5.5553