In [1]:
# IMPORTING MODULES
import glob
import importlib
import matplotlib.pyplot as plt
import numpy as np
import os
cvx_path = os.path.abspath(os.path.join('..', '..', 'cvxEDA', 'src'))
module_path = os.path.abspath(os.path.join('..', '..', 'src'))
import pandas as pd
import random
import scipy.signal as ss
import shap
import sys
sys.path.append(module_path)

import tools.data_reader_ascertain as dr
import tools.display_tools as dt
import tools.preprocessing as preprocessing
import train

import lightgbm as lgb
from lightgbm import LGBMClassifier
from scipy.fft import fft, fftfreq, fftshift
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, \
    mean_absolute_error, mean_squared_error, log_loss
from sklearn.model_selection import train_test_split, cross_val_score, RepeatedKFold
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import normalize
from xgboost import XGBClassifier

import cvxopt.solvers
cvxopt.solvers.options['show_progress'] = False

import warnings
warnings.filterwarnings(
    "ignore", 
    category=RuntimeWarning
)

phases = dr.CLIPS

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
models = {
    "LGB": LGBMClassifier(),
    "RF": RandomForestClassifier(random_state=16),
    "XGB": XGBClassifier(random_state=16),
    # "random": None
}

parameters = {
    "LGB": [{
        "objective": ["binary"],
        "num_leaves": [10, 30, 50, 70],
        "metric": ["mean_absolute_error", "mean_squared_error", "binary_logloss"]
    }],
    "RF": [{
        "n_estimators": [10, 50, 100],
        "max_features": ["sqrt"],
        "min_samples_split": [3, 5, 7]
    }],
    "XGB": [{
        "use_label_encoder": [False],
        "objective": ["binary:logistic"],
        "eval_metric": ["error", mean_absolute_error, mean_squared_error, log_loss]
    }],
    # "random": None
}

metrics = [
    train.Metrics.BPM, 
    train.Metrics.RMSSD, 
    train.Metrics.HF_RR, 
    train.Metrics.LF_RR, 
    train.Metrics.SDNN, 
    train.Metrics.MEAN_SCL, 
    train.Metrics.SCR_RATE, 
]

In [5]:
# K-FOLD CROSS-VALIDATION FOR HYPERPARAMETER SELECTION
importlib.reload(train)
importlib.reload(dr)
importlib.reload(dt)
importlib.reload(preprocessing)


label_type = dr.SelfReports.AROUSAL
threshold = "fixed"

x, y = train.Train_ASCERTAIN.get_ascertain_data(
    metrics, verbose=False, label_type=label_type, threshold=threshold, 
    normalize=True, combine_phases=False
)
x = x.drop(["phaseId"], axis=1)
inds = pd.isnull(x).any(axis=1).to_numpy().nonzero()[0]
x = x.drop(labels=inds, axis=0).reset_index(drop=True)
y = y.drop(labels=inds, axis=0).reset_index(drop=True)

acc_results = {
    "LGB": [],
    "RF": [],
    "XGB": [],
    # "random": []
}
reports = {
    "LGB": [],
    "RF": [],
    "XGB": [],
    # "random": []
}
best_models = {}

num_iters = 5
get_importance = True
for _ in range(num_iters):
    # HYPERPARAMETER TUNING
    model_data = train.grid_search_cv(
        models, parameters, x, y, by_subject=True, save_metrics=True, is_resample=True,
        get_importance=get_importance, drop_subject=True, test_size=0.2, folds=5
    )

    for model_name in models.keys():
        best_models[model_name] = model_data[model_name]["best_model"]

    # FEATURE SELECTION
    x_train, y_train = model_data["train"]
    features = train.feature_selection(best_models, model_data["cv"], x_train, y_train, n_features=5)

    # TEST USING OPTIMIZED MODELS AND FEATAURES
    x_test, y_test = model_data["test"]
    out = train.train_test_model(best_models, features, x_train, y_train, x_test, y_test)

    for model_name in acc_results:
        acc_results[model_name].append(out[model_name]["performance"][0])
        reports[model_name].append(out[model_name]["performance"][1])
        if get_importance:
            try:
                print("")
                feature_imp = list(zip(metrics + ["lf_hf_ratio"], out[model_name]["performance"][2]))
                feature_imp = sorted(feature_imp, key=lambda x: x[1], reverse=True)
                print(feature_imp)
            except Exception as e:
                print(out[model_name]["performance"][2])
            print("")

for model_name in acc_results.keys():
    print(f"Model evaluation metrics for {model_name}:")
    for i in range(len(reports[model_name])):
        report = reports[model_name][i]
        acc = acc_results[model_name][i]
        p = report["precision"]
        r = report["recall"]
        f1 = report["f1"]
        auc = report["auc"]
        print(f"\tAccuracy: {acc}\n\tPrecision: {p}\n\tRecall: {r}\n\tF1-score: {f1}\n\tAUC score: {auc}\n" + "-"*40)
    print(f"Mean acc: {np.mean([acc_results[model_name][i] for i in range(len(reports[model_name]))])}")
    print(f"Mean F1-score: {np.mean([reports[model_name][i]['f1'] for i in range(len(reports[model_name]))])}")
    print(f"Mean AUC score: {np.mean([reports[model_name][i]['auc'] for i in range(len(reports[model_name]))])}")
    print("\n")

Model LGB, Actual: [0 1], [156 324], Predictions: [0 1], [ 35 445]
Model RF, Actual: [0 1], [156 324], Predictions: [0 1], [ 44 436]
Model XGB, Actual: [0 1], [156 324], Predictions: [0 1], [ 68 412]

[('bpm', 207), ('sdnn', 196), ('rmssd', 193), ('hf_rr', 158), ('lf_rr', 146)]


[('rmssd', 0.22391881550397308), ('bpm', 0.21869246227902361), ('sdnn', 0.2158301602649736), ('hf_rr', 0.19747787946883857), ('lf_rr', 0.1440806824831912)]


[('lf_rr', 0.21057917), ('hf_rr', 0.20837079), ('rmssd', 0.20194542), ('sdnn', 0.1941247), ('bpm', 0.18497992)]

Model LGB, Actual: [0 1], [201 278], Predictions: [0 1], [ 81 398]
Model RF, Actual: [0 1], [201 278], Predictions: [0 1], [ 58 421]
Model XGB, Actual: [0 1], [201 278], Predictions: [0 1], [ 71 408]

[('sdnn', 809), ('hf_rr', 693), ('bpm', 553), ('rmssd', 518), ('lf_rr', 327)]


[('lf_rr', 0.2417885284848488), ('bpm', 0.2277655671230572), ('hf_rr', 0.20539515401493458), ('rmssd', 0.20172317112860058), ('sdnn', 0.12332757924855896)]


[('hf_rr'

In [None]:
# TRAIN USING ORIGINAL ASCERTAIN FEATURES
import scipy.io as sio


file = os.path.join(dr.Paths.DATA_DIR, "ASCERTAIN", f"Dt_ECGFeatures.mat")
mat = sio.loadmat(file)["ECGFeatures_58"][0, :]
n_features = mat[0].shape[1]

x = []
for i, clip in enumerate(dr.CLIPS):
    # print(clip + " " + "-"*80)
    for j, s in enumerate(dr.SUBJECTS):
        data = list(mat[j][i])
        data.insert(0, int(s))
        x.append(data)

x = pd.DataFrame(x, columns=["subject"] + [i for i in range(1, n_features+1)])
x = x.drop(list(range(1, 19)), axis=1)

label_type = dr.SelfReports.AROUSAL
threshold = "fixed"

_, y = train.Train_ASCERTAIN.get_ascertain_data(
    metrics, verbose=False, label_type=label_type, threshold=threshold, 
    normalize=True, combine_phases=False
)

inds = pd.isnull(x).any(axis=1).to_numpy().nonzero()[0]
x = x.drop(labels=inds, axis=0).reset_index(drop=True)
y = y.drop(labels=inds, axis=0).reset_index(drop=True)

acc_results = {
    "LGB": [],
    # "KNN": [],
    # "LogReg": [],
    "RF": [],
    "XGB": [],
    "random": []
}
reports = {
    "LGB": [],
    # "KNN": [],
    # "LogReg": [],
    "RF": [],
    "XGB": [],
    "random": []
}
num_iters = 1
get_importance = True
for _ in range(num_iters):
    out = train.train_predict(
        models, x, y, by_subject=True, save_metrics=True, is_resample=False, 
        get_importance=get_importance, drop_subject=False, test_size=0.2, folds=5
    )
    for model_name in acc_results:
        for i in range(len(out[model_name])):
            acc_results[model_name].append(out[model_name][i][0])
            reports[model_name].append(out[model_name][i][1])
        if get_importance:
            try:
                print("")
                # shap.plots.bar(out[model_name][0][2])
                for i in range(len(out[model_name])):
                    feature_imp = list(zip(metrics + ["lf_hf_ratio"], out[model_name][i][2]))
                    feature_imp = sorted(feature_imp, key=lambda x: x[1], reverse=True)
                    print(feature_imp)
            except Exception as e:
                print(out[model_name][0][2])
            print("")

for model_name in acc_results.keys():
    print(f"Model evaluation metrics for {model_name}:")
    for i in range(len(reports[model_name])):
        report = reports[model_name][i]
        acc = acc_results[model_name][i]
        p = report["precision"]
        r = report["recall"]
        f1 = report["f1"]
        auc = report["auc"]
        print(f"\tAccuracy: {acc}\n\tPrecision: {p}\n\tRecall: {r}\n\tF1-score: {f1}\n\tAUC score: {auc}\n" + "-"*40)
    print(f"Mean acc: {np.mean([acc_results[model_name][i] for i in range(len(reports[model_name]))])}")
    print(f"Mean F1-score: {np.mean([reports[model_name][i]['f1'] for i in range(len(reports[model_name]))])}")
    print(f"Mean AUC score: {np.mean([reports[model_name][i]['auc'] for i in range(len(reports[model_name]))])}")
print("\n")
