In [4]:
# IMPORTING MODULES
import glob
import importlib
import matplotlib.pyplot as plt
import numpy as np
import os
cvx_path = os.path.abspath(os.path.join('..', '..', 'cvxEDA', 'src'))
module_path = os.path.abspath(os.path.join('..', '..', 'src'))
import pandas as pd
import random
import scipy.signal as ss
import sys
sys.path.append(module_path)

import tools.data_reader_apd as dr_a
import tools.data_reader_wesad as dr_w
import tools.data_reader_popane as dr_p
import tools.display_tools as dt
import tools.preprocessing as preprocessing
import train

from scipy.fft import fft, fftfreq, fftshift
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import train_test_split, cross_val_score, RepeatedKFold
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import normalize
from xgboost import XGBClassifier

import cvxopt.solvers
cvxopt.solvers.options['show_progress'] = False

import warnings
warnings.filterwarnings(
    "ignore", 
    category=RuntimeWarning
)

In [29]:
metrics_list = [
    [ # ECG time
        train.Metrics.BPM, 
        train.Metrics.IBI, 
        train.Metrics.SDNN, 
        train.Metrics.RMSSD, 
    ],
    [ # ECG frequency
        train.Metrics.HF_RR, 
        train.Metrics.LF_RR
    ],
    [ # EDA metrics
        train.Metrics.MEAN_SCL, 
        train.Metrics.SCR_RATE
    ],
    [ # all
        train.Metrics.BPM, 
        train.Metrics.IBI, 
        train.Metrics.SDNN, 
        train.Metrics.RMSSD, 
        train.Metrics.HF_RR,
        train.Metrics.LF_RR,
        train.Metrics.MEAN_SCL, 
        train.Metrics.SCR_RATE
    ]
]

single_metrics = [
    train.Metrics.BPM, 
    train.Metrics.IBI, 
    train.Metrics.SDNN, 
    train.Metrics.RMSSD, 
    train.Metrics.HF_RR,
    train.Metrics.LF_RR,
    train.Metrics.MEAN_SCL, 
    train.Metrics.SCR_RATE
]

threshold = "fixed"

In [None]:
# LOAD TRAIN AND TEST DATA
importlib.reload(train)
importlib.reload(dr_a)
importlib.reload(dt)


model_phases = [
    [
        "Baseline_Rest", 
        "BugBox_Relax", "BugBox_Anticipate", "BugBox_Exposure", "BugBox_Break",
        "Speech_Relax", "Speech_Anticipate", "Speech_Exposure", "Speech_Break"
    ],
    [
        "Baseline_Rest", 
        "BugBox_Relax", "BugBox_Anticipate", "BugBox_Break",
        "Speech_Relax", "Speech_Anticipate", "Speech_Break"
    ],
    [
        "Baseline_Rest", 
        "BugBox_Relax", "BugBox_Anticipate", 
        "Speech_Relax", "Speech_Anticipate"
    ],
    [
        "Baseline_Rest", 
        "BugBox_Relax",
        "Speech_Relax"
    ],
    ["BugBox_Break", "Speech_Break"],
    ["BugBox_Exposure", "Speech_Exposure"]
]

anxiety_label_type = "Anxiety"

for phases in model_phases:
    print(f"PHASES: {phases} " + "-"*30)
    for metrics in metrics_list:
        print(f"METRICS: {metrics}")
        x, y = train.Train_APD.get_apd_data_ranking(metrics, phases, verbose=False, anxiety_label_type=anxiety_label_type, threshold=threshold)
        x = x.drop(["phaseId"], axis=1)
        # drop subjects with noisy data
        x = x[x['subject'] != 84.0]
        y = y[y['subject'] != 84.0]
        # x = x[x['subject'] != 8.0]
        # y = y[y['subject'] != 8.0]

        x.drop(labels=["anxietyGroup"], axis=1)

        # 0-1 scaling
        for i in range(3, len(x.columns)):
            data_col = x[x.columns[i]]
            data_col = (data_col - data_col.min())/(data_col.max() - data_col.min())
            x[x.columns[i]] = data_col

        model = LinearRegression()
        model.fit(x, y)
        # print(model.intercept_) 
        # print(model.coef_)
        print(f"R2: {model.score(x, y)}\n")

print("\n")

for phases in model_phases:
    print(f"PHASES: {phases} " + "-"*30)
    for metric in single_metrics:
        print(f"METRICS: {metric}")
        x, y = train.Train_APD.get_apd_data_ranking(metrics, phases, verbose=False, anxiety_label_type=anxiety_label_type, threshold=threshold)
        x = x.drop(["phaseId"], axis=1)
        # drop subjects with noisy data
        x = x[x['subject'] != 84.0]
        y = y[y['subject'] != 84.0]
        # x = x[x['subject'] != 8.0]
        # y = y[y['subject'] != 8.0]

        x.drop(labels=["anxietyGroup"], axis=1)

        # 0-1 scaling
        for i in range(3, len(x.columns)):
            data_col = x[x.columns[i]]
            data_col = (data_col - data_col.min())/(data_col.max() - data_col.min())
            x[x.columns[i]] = data_col

        model = LinearRegression()
        model.fit(x, y)
        # print(model.intercept_) 
        # print(model.coef_)
        print(f"R2: {model.score(x, y)}\n")

In [None]:
# LOAD TRAIN AND TEST DATA
importlib.reload(train)
importlib.reload(dr_w)
importlib.reload(dt)

model_phases = [
    [
        dr_w.Phases.BASE,
        dr_w.Phases.FUN,
        dr_w.Phases.TSST,
        dr_w.Phases.MEDI_1,
        dr_w.Phases.MEDI_2
    ],
    [
        dr_w.Phases.BASE,
        dr_w.Phases.TSST,
        dr_w.Phases.MEDI_1,
        dr_w.Phases.MEDI_2
    ],
    [
        dr_w.Phases.BASE,
        dr_w.Phases.MEDI_1,
        dr_w.Phases.MEDI_2
    ],
    [
        dr_w.Phases.BASE,
    ]
]

label_type = "stai"

for phases in model_phases:
    print(f"PHASES: {phases} " + "-"*30)
    for metrics in metrics_list:
        print(f"METRICS: {metrics}")
        x, y = train.Train_WESAD.get_wesad_data(metrics, phases, verbose=False, label_type=label_type, threshold=threshold)
        x = x.drop(["phaseId"], axis=1)

        # 0-1 scaling
        for i in range(3, len(x.columns)):
            data_col = x[x.columns[i]]
            data_col = (data_col - data_col.min())/(data_col.max() - data_col.min())
            x[x.columns[i]] = data_col

        model = LinearRegression()
        model.fit(x, y)
        # print(model.intercept_) 
        # print(model.coef_)
        print(f"R2: {model.score(x, y)}\n")

print("\n")

for phases in model_phases:
    print(f"PHASES: {phases} " + "-"*30)
    for metric in single_metrics:
        print(f"METRICS: {metric}")
        x, y = train.Train_WESAD.get_wesad_data(metrics, phases, verbose=False, label_type=label_type, threshold=threshold)
        x = x.drop(["phaseId"], axis=1)

        # 0-1 scaling
        for i in range(3, len(x.columns)):
            data_col = x[x.columns[i]]
            data_col = (data_col - data_col.min())/(data_col.max() - data_col.min())
            x[x.columns[i]] = data_col

        model = LinearRegression()
        model.fit(x, y)
        # print(model.intercept_) 
        # print(model.coef_)
        print(f"R2: {model.score(x, y)}\n")

In [49]:
# LOAD TRAIN AND TEST DATA
importlib.reload(train)
importlib.reload(dr_p)
importlib.reload(dt)


popane_phases = {
    "Study1": dr_p.Study1.ALL,
    "Study2": dr_p.Study2.ALL,
    "Study3": dr_p.Study3.ALL,
    "Study4": dr_p.Study4.ALL,
    "Study5": dr_p.Study5.ALL,
    # "Study6": dr_p.Study6.ALL,
    # "Study7": dr_p.Study7.ALL
}

label_type = "affect"

for study in popane_phases.keys():
    print(f"{study} " + "-"*60)
    phases = popane_phases[study]
    for phase in phases:
        for metrics in metrics_list:
            print(f"METRICS: {metrics}")
            x, y = train.Train_POPANE.get_popane_data(study, metrics, [phase], verbose=False, label_type=label_type, threshold=threshold)
            # x = x.drop(["phaseId"], axis=1)
            # nan_idx = x[x.isna().any(axis=1)].index
            # x = x.drop(index=nan_idx)
            # y = y.drop(index=nan_idx)

            # nan_idx = y[y.isna().any(axis=1)].index
            # x = x.drop(index=nan_idx)
            # y = y.drop(index=nan_idx)

            # 0-1 scaling
            for i in range(3, len(x.columns)):
                data_col = x[x.columns[i]]
                data_col = (data_col - data_col.min())/(data_col.max() - data_col.min())
                x[x.columns[i]] = data_col

            model = LinearRegression()

            model.fit(x, y)
            # print(model.intercept_) 
            # print(model.coef_)
            print(f"R2: {model.score(x, y)}\n")

        print("\n")

        for metric in single_metrics:
            print(f"METRICS: {metric}")
            x, y = train.Train_POPANE.get_popane_data(study, metric, [phase], verbose=False, label_type=label_type, threshold=threshold)
            x = x.drop(["phaseId"], axis=1)

            # 0-1 scaling
            for i in range(3, len(x.columns)):
                data_col = x[x.columns[i]]
                data_col = (data_col - data_col.min())/(data_col.max() - data_col.min())
                x[x.columns[i]] = data_col

            model = LinearRegression()
            model.fit(x, y)
            # print(model.intercept_) 
            # print(model.coef_)
            print(f"R2: {model.score(x, y)}\n")

Study1 ------------------------------------------------------------
METRICS: ['bpm', 'ibi', 'sdnn', 'rmssd']
R2: 0.5025300268771222

METRICS: ['hf_rr', 'lf_rr']
R2: 0.5077266581692784

METRICS: ['mean_SCL', 'SCR_rate']
R2: 0.5079038075771718

METRICS: ['bpm', 'ibi', 'sdnn', 'rmssd', 'hf_rr', 'lf_rr', 'mean_SCL', 'SCR_rate']


ValueError: Input X contains NaN.
LinearRegression does not accept missing values encoded as NaN natively. For supervised learning, you might want to consider sklearn.ensemble.HistGradientBoostingClassifier and Regressor which accept missing values encoded as NaNs natively. Alternatively, it is possible to preprocess the data, for instance by using an imputer transformer in a pipeline or drop samples with missing values. See https://scikit-learn.org/stable/modules/impute.html You can find a list of all estimators that handle NaN values at the following page: https://scikit-learn.org/stable/modules/impute.html#estimators-that-handle-nan-values

In [51]:
metrics = [ # all
    train.Metrics.BPM, 
    train.Metrics.IBI, 
    train.Metrics.SDNN, 
    train.Metrics.RMSSD, 
    train.Metrics.HF_RR,
    train.Metrics.LF_RR,
    train.Metrics.MEAN_SCL, 
    train.Metrics.SCR_RATE
]

popane_phases = {
    "Study1": dr_p.Study1.ALL,
    "Study2": dr_p.Study2.ALL,
    "Study3": dr_p.Study3.ALL,
    "Study4": dr_p.Study4.ALL,
    "Study5": dr_p.Study5.ALL,
    # "Study6": dr_p.Study6.ALL,
    # "Study7": dr_p.Study7.ALL
}

label_type = "affect"

for study in popane_phases.keys():
    print(f"{study} " + "-"*60)
    phases = popane_phases[study]
    for phase in phases:
        for metrics in metrics_list:
            print(f"METRICS: {metrics}")
            x, y = train.Train_POPANE.get_popane_data(study, metrics, [phase], verbose=False, label_type=label_type, threshold=threshold)
            print(x[x.isna().any(axis=1)].index)

Study1 ------------------------------------------------------------
METRICS: ['bpm', 'ibi', 'sdnn', 'rmssd']
Int64Index([], dtype='int64')
METRICS: ['hf_rr', 'lf_rr']
Int64Index([], dtype='int64')
METRICS: ['mean_SCL', 'SCR_rate']
Int64Index([], dtype='int64')
METRICS: ['bpm', 'ibi', 'sdnn', 'rmssd', 'hf_rr', 'lf_rr', 'mean_SCL', 'SCR_rate']
Int64Index([138, 139, 140], dtype='int64')
METRICS: ['bpm', 'ibi', 'sdnn', 'rmssd']
Int64Index([], dtype='int64')
METRICS: ['hf_rr', 'lf_rr']
Int64Index([], dtype='int64')
METRICS: ['mean_SCL', 'SCR_rate']
Int64Index([], dtype='int64')
METRICS: ['bpm', 'ibi', 'sdnn', 'rmssd', 'hf_rr', 'lf_rr', 'mean_SCL', 'SCR_rate']
Int64Index([], dtype='int64')
METRICS: ['bpm', 'ibi', 'sdnn', 'rmssd']
Int64Index([], dtype='int64')
METRICS: ['hf_rr', 'lf_rr']
Int64Index([], dtype='int64')
METRICS: ['mean_SCL', 'SCR_rate']
Int64Index([], dtype='int64')
METRICS: ['bpm', 'ibi', 'sdnn', 'rmssd', 'hf_rr', 'lf_rr', 'mean_SCL', 'SCR_rate']
Int64Index([138, 139], dtype='i