In [1]:
# IMPORTING MODULES
import glob
import importlib
import matplotlib.pyplot as plt
import numpy as np
import os
cvx_path = os.path.abspath(os.path.join('..', '..', 'cvxEDA', 'src'))
module_path = os.path.abspath(os.path.join('..', '..', 'src'))
import pandas as pd
import random
import scipy.signal as ss
import sys
sys.path.append(module_path)

import tools.data_reader_apd as dr_a
import tools.data_reader_wesad as dr_w
import tools.data_reader_popane as dr_p
import tools.display_tools as dt
import tools.preprocessing as preprocessing
import train

from scipy.fft import fft, fftfreq, fftshift
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import train_test_split, cross_val_score, RepeatedKFold
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import normalize
from xgboost import XGBClassifier

import cvxopt.solvers
cvxopt.solvers.options['show_progress'] = False

import warnings
warnings.filterwarnings(
    "ignore", 
    category=RuntimeWarning
)

pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.


In [14]:
from itertools import combinations

temp_a, _ = train.Train_APD.get_apd_data_ranking([train.Metrics.BPM], phases=dr_a.Phases.PHASES_LIST)
idx = temp_a[temp_a["bpm"] > 200].index 
invalid_apd_subjects = set(temp_a["subject"].iloc[idx].tolist())
idx = temp_a[temp_a["bpm"] < 35].index 
invalid_apd_subjects.update(set(temp_a["subject"].iloc[idx].tolist()))

metrics_list = [
    train.Metrics.BPM, 
    train.Metrics.RMSSD, 
    train.Metrics.IBI, 
    train.Metrics.SDNN, 
    train.Metrics.HF_RR, 
    train.Metrics.LF_RR,
    train.Metrics.MEAN_SCL, 
    train.Metrics.SCR_RATE
]

metrics_combinations = {
    1: None,
    2: None,
    3: None,
    4: None,
    5: None,
    6: None,
    7: None,
    8: None,
}

for i in list(metrics_combinations.keys()):
    metrics_combinations[i] = [list(metrics) for metrics in combinations(metrics_list, i)]

threshold = "dynamic"

In [47]:
# LOAD TRAIN AND TEST DATA
importlib.reload(train)
importlib.reload(dr_a)
importlib.reload(dt)


model_phases = [
    [
        "Baseline_Rest", 
        "BugBox_Relax", "BugBox_Anticipate", "BugBox_Exposure", "BugBox_Break",
        "Speech_Relax", "Speech_Anticipate", "Speech_Exposure", "Speech_Break"
    ],
    [
        "Baseline_Rest", 
        "BugBox_Relax", "BugBox_Anticipate", "BugBox_Break",
        "Speech_Relax", "Speech_Anticipate", "Speech_Break"
    ],
    [
        "Baseline_Rest", 
        "BugBox_Relax", "BugBox_Anticipate", 
        "Speech_Relax", "Speech_Anticipate"
    ],
    [
        "Baseline_Rest", 
        "BugBox_Relax",
        "Speech_Relax"
    ],
    ["BugBox_Break", "Speech_Break"],
    ["BugBox_Exposure", "Speech_Exposure"]
]

# anxiety_label_type = "Anxiety"
anxiety_label_type = None

results = {
    1: [],
    2: [],
    3: [],
    4: [],
    5: [],
    6: [],
    7: [],
    8: []
}

for phases in model_phases:
    # print(f"PHASES: {phases} " + "-"*30)
    for i in list(metrics_combinations.keys()):
        for metrics in metrics_combinations[i]:
            # print(f"METRICS: {metrics}")
            x, y = train.Train_APD.get_apd_data_ranking(metrics, phases, verbose=False, anxiety_label_type=anxiety_label_type, threshold=threshold)
            x = x.drop(["phaseId"], axis=1)
            # drop subjects with noisy data
            x = x[~x["subject"].isin(invalid_apd_subjects)]
            y = y[~y["subject"].isin(invalid_apd_subjects)]

            if anxiety_label_type is not None:
                x.drop(labels=["anxietyGroup"], axis=1)

            # 0-1 scaling
            for i in range(3, len(x.columns)):
                data_col = x[x.columns[i]]
                data_col = (data_col - data_col.min())/(data_col.max() - data_col.min())
                x[x.columns[i]] = data_col

            model = LinearRegression()
            model.fit(x, y)
            # print(model.intercept_) 
            # print(model.coef_)
            # print(f"R2: {model.score(x, y)}\n")
            results[i].append([model.score(x, y), metrics])



In [None]:
results_apd = []
for i in list(results.keys()):
    results_apd.append(sorted(results[i], key=lambda x: x[0], reverse=True))

In [52]:
# LOAD TRAIN AND TEST DATA
importlib.reload(train)
importlib.reload(dr_w)
importlib.reload(dt)

model_phases = [
    [
        dr_w.Phases.BASE,
        dr_w.Phases.FUN,
        dr_w.Phases.TSST,
        dr_w.Phases.MEDI_1,
        dr_w.Phases.MEDI_2
    ],
    [
        dr_w.Phases.BASE,
        dr_w.Phases.TSST,
        dr_w.Phases.MEDI_1,
        dr_w.Phases.MEDI_2
    ],
    [
        dr_w.Phases.BASE,
        dr_w.Phases.MEDI_1,
        dr_w.Phases.MEDI_2
    ],
    [
        dr_w.Phases.BASE,
    ]
]

label_type = "stai"
results = {
    1: [],
    2: [],
    3: [],
    4: [],
    5: [],
    6: [],
    7: [],
    8: []
}

for phases in model_phases:
    # print(f"PHASES: {phases} " + "-"*30)
    for i in list(metrics_combinations.keys()):
        for metrics in metrics_combinations[i]:
            # print(f"METRICS: {metrics}")
            x, y = train.Train_WESAD.get_wesad_data(metrics, phases, verbose=False, label_type=label_type, threshold=threshold)
            x = x.drop(["phaseId"], axis=1)

            # 0-1 scaling
            for i in range(3, len(x.columns)):
                data_col = x[x.columns[i]]
                data_col = (data_col - data_col.min())/(data_col.max() - data_col.min())
                x[x.columns[i]] = data_col

            model = LinearRegression()
            model.fit(x, y)
            # print(model.intercept_) 
            # print(model.coef_)
            # print(f"R2: {model.score(x, y)}\n")
            results[i].append([model.score(x, y), metrics])


In [None]:
results_wesad = []
for i in list(results.keys()):
    results_wesad.append(sorted(results[i], key=lambda x: x[0], reverse=True))

In [None]:
# LOAD TRAIN AND TEST DATA
importlib.reload(train)
importlib.reload(dr_p)
importlib.reload(dt)


popane_phases = {
    "Study1": dr_p.Study1.ALL,
    "Study2": dr_p.Study2.ALL,
    "Study3": dr_p.Study3.ALL,
    # "Study4": dr_p.Study4.ALL,
    "Study5": dr_p.Study5.ALL,
    # "Study6": dr_p.Study6.ALL,
    # "Study7": dr_p.Study7.ALL
}

label_type = "affect"

for study in popane_phases.keys():
    # print(f"{study} " + "-"*60)
    phases = popane_phases[study]
    for phase in phases:
        # print(f"{phase} " + "-"*30)
        for i in list(metrics_combinations.keys()):
            for metrics in metrics_combinations[i]:
                print(f"METRICS: {metrics}")
                x, y = train.Train_POPANE.get_popane_data(study, metrics, [phase], verbose=False, label_type=label_type, threshold=threshold)
                # x = x.drop(["phaseId"], axis=1)
                nan_idx = x[x.isna().any(axis=1)].index
                x = x.drop(index=nan_idx)
                y = y.drop(index=nan_idx)

                nan_idx = y[y.isna().any(axis=1)].index
                x = x.drop(index=nan_idx)
                y = y.drop(index=nan_idx)

                # 0-1 scaling
                for i in range(3, len(x.columns)):
                    data_col = x[x.columns[i]]
                    data_col = (data_col - data_col.min())/(data_col.max() - data_col.min())
                    x[x.columns[i]] = data_col

                model = LinearRegression()

                model.fit(x, y)
                # print(model.intercept_) 
                # print(model.coef_)
                results[i].append([model.score(x, y), metrics])

In [None]:
results_popane = []
for i in list(results.keys()):
    results_popane.append(sorted(results[i], key=lambda x: x[0], reverse=True))