In [7]:
# IMPORTING MODULES
import glob
import importlib
import matplotlib.pyplot as plt
import numpy as np
import os
cvx_path = os.path.abspath(os.path.join('..', '..', 'cvxEDA', 'src'))
module_path = os.path.abspath(os.path.join('..', '..', 'src'))
import pandas as pd
import random
import scipy.signal as ss
import shap
import sys
sys.path.append(module_path)

import tools.data_reader_case as dr
import tools.display_tools as dt
import tools.preprocessing as preprocessing
import train

import lightgbm as lgb
from lightgbm import LGBMClassifier
from scipy.fft import fft, fftfreq, fftshift
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, \
    mean_absolute_error, mean_squared_error, log_loss
from sklearn.model_selection import train_test_split, cross_val_score, RepeatedKFold
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import normalize
from xgboost import XGBClassifier

import cvxopt.solvers
cvxopt.solvers.options['show_progress'] = False

import warnings
warnings.filterwarnings(
    "ignore", 
    category=RuntimeWarning
)


phases = dr.CLIPS


In [11]:
models = {
    "LGB": LGBMClassifier(),
    "RF": RandomForestClassifier(random_state=16),
    "XGB": XGBClassifier(random_state=16),
    # "random": None
}

parameters = {
    "LGB": [{
        "objective": ["binary"],
        "num_leaves": [10, 20, 30, 40, 50],
        "max_depth": [3, 4, 5, 6, 7],
        "metric": ["mean_absolute_error", "mean_squared_error", "binary_logloss"]
    }],
    "RF": [{
        "n_estimators": [10, 20, 30, 40, 50],
        "max_features": ["sqrt", "0.4"],
        "min_samples_split": [3, 4, 5, 6, 7],
        "random_state": [16]
    }],
    "XGB": [{
        "objective": ["binary:logistic"],
        "learning_rate": [0.01, 0.1, 0.3, 0.5],
        "max_depth": [4, 5, 6, 7],
        "n_estimators": [10, 20, 30, 40, 50],
        "eval_metric": ["error"],
        "use_label_encoder": [False],
        "random_state": [16]
    }],
    # "random": None
}

metrics = [
    train.Metrics.BPM, 
    train.Metrics.RMSSD, 
    train.Metrics.HF_RR, 
    train.Metrics.LF_RR, 
    train.Metrics.SDNN, 
    train.Metrics.MEAN_SCL, 
    train.Metrics.SCR_RATE, 
]
# ] + train.Metrics.STATISTICAL

In [12]:
# K-FOLD CROSS-VALIDATION FOR HYPERPARAMETER SELECTION
importlib.reload(train)
importlib.reload(dr)
importlib.reload(dt)
importlib.reload(preprocessing)

model_phases = dr.CLIPS

label_type = dr.SelfReports.AROUSAL
threshold = "fixed"

x, y = train.Train_CASE.get_case_data(
    metrics, verbose=False, label_type=label_type, threshold=threshold, 
    normalize=True, combine_phases=False
)
x = x.drop(["phaseId"], axis=1)
inds = pd.isnull(x).any(axis=1).to_numpy().nonzero()[0]
x = x.drop(labels=inds, axis=0).reset_index(drop=True)
y = y.drop(labels=inds, axis=0).reset_index(drop=True)

acc_results = {
    "LGB": [],
    "RF": [],
    "XGB": [],
    # "random": []
}
reports = {
    "LGB": [],
    "RF": [],
    "XGB": [],
    # "random": []
}
best_models = {}

num_iters = 5
get_importance = True
for _ in range(num_iters):
    # HYPERPARAMETER TUNING
    model_data = train.grid_search_cv(
        models, parameters, x, y, by_subject=True, save_metrics=True, is_resample=True,
        get_importance=get_importance, drop_subject=True, test_size=0.1, folds=5
    )

    for model_name in models.keys():
        best_models[model_name] = model_data[model_name]["best_model"]
        print(f"{model_name}: {model_data[model_name]['best_params']}")

    # FEATURE SELECTION
    x_train, y_train = model_data["train"]
    # features = {name: metrics for name in models.keys()}
    features = train.feature_selection(best_models, model_data["cv"], x_train, y_train, n_features=5)

    # TEST USING OPTIMIZED MODELS AND FEATAURES
    x_test, y_test = model_data["test"]
    out = train.train_test_model(best_models, features, x_train, y_train, x_test, y_test)

    for model_name in acc_results:
        acc_results[model_name].append(out[model_name]["performance"][0])
        reports[model_name].append(out[model_name]["performance"][1])
        if get_importance:
            try:
                print("")
                feature_imp = list(zip(metrics + ["lf_hf_ratio"], out[model_name]["performance"][2]))
                feature_imp = sorted(feature_imp, key=lambda x: x[1], reverse=True)
                print(feature_imp)
            except Exception as e:
                print(out[model_name]["performance"][2])
            print("")

for model_name in acc_results.keys():
    print(f"Model evaluation metrics for {model_name}:")
    for i in range(len(reports[model_name])):
        report = reports[model_name][i]
        acc = acc_results[model_name][i]
        p = report["precision"]
        r = report["recall"]
        f1 = report["f1"]
        auc = report["auc"]
        print(f"\tAccuracy: {acc}\n\tPrecision: {p}\n\tRecall: {r}\n\tF1-score: {f1}\n\tAUC score: {auc}\n" + "-"*40)
    print(f"Mean acc: {np.mean([acc_results[model_name][i] for i in range(len(reports[model_name]))])}")
    print(f"Mean F1-score: {np.mean([reports[model_name][i]['f1'] for i in range(len(reports[model_name]))])}")
    print(f"Mean AUC score: {np.mean([reports[model_name][i]['auc'] for i in range(len(reports[model_name]))])}")
    print("\n")

Grid search for LGB ...
Grid search for RF ...


One or more of the test scores are non-finite: [0.7249092  0.74578424 0.74899647 0.75567572 0.75603503 0.72753741
 0.74783259 0.75359943 0.75738106 0.76361543 0.74995026 0.75825233
 0.75253823 0.75870998 0.76399647 0.76136835 0.76920121 0.77131329
 0.7721111  0.774135   0.78288532 0.78019337 0.77911049 0.78044231
 0.77992282        nan        nan        nan        nan        nan
        nan        nan        nan        nan        nan        nan
        nan        nan        nan        nan        nan        nan
        nan        nan        nan        nan        nan        nan
        nan        nan]


Grid search for XGB ...


One or more of the test scores are non-finite: [0.74906956 0.75802499 0.75886126 0.7634001  0.75876323 0.73969426
 0.74539246 0.75415655 0.75483139 0.75326045 0.7486331  0.75501829
 0.75953143 0.75803995 0.75942778 0.74273407 0.74600655 0.74976686
 0.74467035 0.7454649  0.76450027 0.76280522 0.7656597  0.76665535
 0.76411348 0.75402948 0.7651602  0.76954192 0.76892948 0.77711946
 0.76388097 0.7611682  0.76323049 0.75510342 0.76046501 0.75011756
 0.75003342 0.74687519 0.7488814  0.74858946 0.77808232 0.76226665
 0.75878156 0.75634483 0.75832928 0.74293073 0.74567275 0.7406501
 0.7387028  0.73997313 0.76764253 0.7587304  0.74685036 0.75064334
 0.74658937 0.76895165 0.75653372 0.75666016 0.74996243 0.74466324
 0.75487986 0.76484306 0.7552924  0.75435734 0.75344521 0.76547408
 0.75004274 0.75043198 0.74667846 0.73931714 0.75098237 0.73239291
 0.73459218 0.72920792 0.72866627 0.73533549 0.73619387 0.73418549
 0.73332326 0.73382108        nan        nan        nan        nan
        nan     

LGB: {'max_depth': 3, 'metric': 'mean_absolute_error', 'num_leaves': 10, 'objective': 'binary'}
RF: {'max_features': 'sqrt', 'min_samples_split': 7, 'n_estimators': 10, 'random_state': 16}
XGB: {'eval_metric': 'error', 'learning_rate': 0.3, 'max_depth': 4, 'n_estimators': 10, 'objective': 'binary:logistic', 'random_state': 16, 'use_label_encoder': False}
Feature selection for LGB ...
Feature selection for RF ...
Feature selection for XGB ...
Training LGB ...
Model LGB, Actual: [0 1], [27 18], Predictions: [0 1], [32 13]
Training RF ...
Model RF, Actual: [0 1], [27 18], Predictions: [0 1], [30 15]
Training XGB ...
Model XGB, Actual: [0 1], [27 18], Predictions: [0 1], [35 10]

[('hf_rr', 121), ('sdnn', 113), ('rmssd', 107), ('bpm', 95), ('lf_rr', 65)]


[('rmssd', 0.42503531182733906), ('sdnn', 0.191277745136269), ('hf_rr', 0.17677670025511252), ('bpm', 0.11158721568106983), ('lf_rr', 0.09532302710020973)]


[('hf_rr', 0.47767732), ('sdnn', 0.16263756), ('bpm', 0.13289185), ('lf_rr', 0.

One or more of the test scores are non-finite: [0.66512425 0.67395732 0.68767733 0.6952583  0.69705188 0.68270523
 0.69467449 0.70286955 0.70704947 0.706925   0.71302404 0.7195649
 0.70927126 0.70953425 0.7070355  0.7099518  0.72331291 0.71823354
 0.71779897 0.71699097 0.69654872 0.73280932 0.73076232 0.73438064
 0.73039194        nan        nan        nan        nan        nan
        nan        nan        nan        nan        nan        nan
        nan        nan        nan        nan        nan        nan
        nan        nan        nan        nan        nan        nan
        nan        nan]


Grid search for XGB ...


One or more of the test scores are non-finite: [0.69534247 0.70217729 0.69244185 0.70205301 0.70121048 0.66649737
 0.68447017 0.6851981  0.69632917 0.68962439 0.66292608 0.66357675
 0.67687493 0.67678072 0.67885886 0.65194975 0.65500678 0.66421182
 0.66356608 0.67767175 0.70242264 0.70540635 0.71548775 0.72277605
 0.72233968 0.69066526 0.70913352 0.7239225  0.72765583 0.72074399
 0.69656417 0.69465414 0.71193791 0.70779248 0.71185991 0.69666506
 0.69820024 0.70814895 0.70983906 0.71040686 0.71048463 0.71704401
 0.71349515 0.70242794 0.70157691 0.71523034 0.7128189  0.7122263
 0.70540594 0.71007795 0.68715397 0.70311987 0.70066133 0.69047165
 0.68846945 0.69927432 0.71501877 0.70489796 0.69982234 0.69804437
 0.71611959 0.69997491 0.68788722 0.686793   0.67783147 0.69784848
 0.69138498 0.6936228  0.70026434 0.69798974 0.6906845  0.68844511
 0.68460464 0.68144174 0.67798976 0.69848588 0.69847208 0.69725641
 0.6934777  0.69410669        nan        nan        nan        nan
        nan     

LGB: {'max_depth': 3, 'metric': 'mean_absolute_error', 'num_leaves': 10, 'objective': 'binary'}
RF: {'max_features': 'sqrt', 'min_samples_split': 7, 'n_estimators': 40, 'random_state': 16}
XGB: {'eval_metric': 'error', 'learning_rate': 0.1, 'max_depth': 5, 'n_estimators': 40, 'objective': 'binary:logistic', 'random_state': 16, 'use_label_encoder': False}
Feature selection for LGB ...
Feature selection for RF ...
Feature selection for XGB ...
Training LGB ...
Model LGB, Actual: [0 1], [39  6], Predictions: [0 1], [35 10]
Training RF ...
Model RF, Actual: [0 1], [39  6], Predictions: [0 1], [37  8]
Training XGB ...
Model XGB, Actual: [0 1], [39  6], Predictions: [0 1], [39  6]

[('rmssd', 129), ('hf_rr', 105), ('lf_rr', 101), ('bpm', 97), ('sdnn', 70)]


[('hf_rr', 0.34559163536461923), ('rmssd', 0.27425461275593355), ('bpm', 0.13833756927440535), ('lf_rr', 0.1380353091198421), ('sdnn', 0.10378087348519986)]


[('rmssd', 0.43733165), ('bpm', 0.15840259), ('lf_rr', 0.14799021), ('hf_rr', 

One or more of the test scores are non-finite: [0.69361477 0.68572497 0.68247676 0.67705626 0.68461515 0.69329804
 0.67768325 0.68065341 0.68384369 0.68249814 0.65283932 0.65432547
 0.65508376 0.65994866 0.66867593 0.68393576 0.68365363 0.67412959
 0.67550596 0.67848075 0.69274707 0.69032933 0.68251321 0.68610438
 0.68885826        nan        nan        nan        nan        nan
        nan        nan        nan        nan        nan        nan
        nan        nan        nan        nan        nan        nan
        nan        nan        nan        nan        nan        nan
        nan        nan]


Grid search for XGB ...


One or more of the test scores are non-finite: [0.67674521 0.68762779 0.67906923 0.68013033 0.66951747 0.6742034
 0.68504756 0.69040362 0.68500109 0.69193175 0.68139909 0.68236643
 0.6916422  0.69757682 0.70661112 0.68247689 0.68091493 0.6895842
 0.69491039 0.69235278 0.67777942 0.68510602 0.68272208 0.70141974
 0.69510678 0.68760828 0.69878664 0.70083814 0.70909481 0.70698901
 0.68293837 0.69717479 0.69746317 0.70353523 0.70571591 0.6832893
 0.68577422 0.68043194 0.68429323 0.68995608 0.69445037 0.68875046
 0.69740688 0.68792256 0.67763393 0.68566836 0.69278965 0.68122855
 0.68373745 0.67992041 0.70154033 0.69378249 0.6897434  0.67871692
 0.68205407 0.67605076 0.66425364 0.66056457 0.6635773  0.66213536
 0.70265797 0.68020486 0.67816653 0.67995861 0.67673859 0.67291575
 0.67036019 0.66756976 0.67333637 0.67649158 0.70951765 0.69019997
 0.6898497  0.68904375 0.68466586 0.69577995 0.67925832 0.6837932
 0.68209215 0.68024736        nan        nan        nan        nan
        nan        

LGB: {'max_depth': 3, 'metric': 'mean_absolute_error', 'num_leaves': 10, 'objective': 'binary'}
RF: {'max_features': 'sqrt', 'min_samples_split': 3, 'n_estimators': 10, 'random_state': 16}
XGB: {'eval_metric': 'error', 'learning_rate': 0.5, 'max_depth': 6, 'n_estimators': 10, 'objective': 'binary:logistic', 'random_state': 16, 'use_label_encoder': False}
Feature selection for LGB ...
Feature selection for RF ...
Feature selection for XGB ...
Training LGB ...
Model LGB, Actual: [0 1], [27 18], Predictions: [0 1], [32 13]
Training RF ...
Model RF, Actual: [0 1], [27 18], Predictions: [0 1], [32 13]
Training XGB ...
Model XGB, Actual: [0 1], [27 18], Predictions: [0 1], [31 14]

[('rmssd', 123), ('sdnn', 109), ('hf_rr', 102), ('bpm', 58), ('lf_rr', 57)]


[('rmssd', 0.42779556879844566), ('bpm', 0.17124465259832183), ('hf_rr', 0.15038505804309849), ('sdnn', 0.1423444042258386), ('lf_rr', 0.10823031633429545)]


[('rmssd', 0.4035095), ('hf_rr', 0.18322045), ('bpm', 0.16110988), ('lf_rr', 0

One or more of the test scores are non-finite: [0.66102503 0.69343239 0.69558834 0.6984199  0.69930268 0.69851652
 0.70921863 0.71915386 0.71807261 0.71407288 0.68182337 0.70953469
 0.71894081 0.71654785 0.71675492 0.70536714 0.72377547 0.71906716
 0.72005405 0.71827016 0.68624304 0.71274349 0.70899592 0.70721008
 0.7099098         nan        nan        nan        nan        nan
        nan        nan        nan        nan        nan        nan
        nan        nan        nan        nan        nan        nan
        nan        nan        nan        nan        nan        nan
        nan        nan]


Grid search for XGB ...


One or more of the test scores are non-finite: [0.70040156 0.71191525 0.71707186 0.71521208 0.71633431 0.70638149
 0.70588579 0.72217504 0.72325372 0.71259153 0.68174025 0.68034778
 0.68101192 0.69884215 0.72785385 0.69354351 0.67825213 0.68630547
 0.69411833 0.71022586 0.7298445  0.71283546 0.72686802 0.72394493
 0.72161519 0.73520964 0.72789921 0.73666312 0.7276866  0.73160402
 0.72688109 0.71540218 0.72198201 0.71853048 0.71687994 0.7214724
 0.7048822  0.71021336 0.71997262 0.71940519 0.71288836 0.71284401
 0.70319245 0.69909871 0.69676059 0.71987834 0.70298565 0.70725926
 0.70807159 0.70377795 0.70990976 0.71992141 0.72277733 0.71066321
 0.70970397 0.70924659 0.71329931 0.7086796  0.70972993 0.71213415
 0.69419015 0.69234033 0.68924942 0.69679169 0.68626644 0.7357749
 0.7237589  0.72750478 0.72205352 0.72054448 0.70148316 0.71129107
 0.70945251 0.70455662 0.70283032 0.71760898 0.70580064 0.69623436
 0.69638596 0.70000097        nan        nan        nan        nan
        nan      

LGB: {'max_depth': 4, 'metric': 'mean_absolute_error', 'num_leaves': 10, 'objective': 'binary'}
RF: {'max_features': 'sqrt', 'min_samples_split': 6, 'n_estimators': 20, 'random_state': 16}
XGB: {'eval_metric': 'error', 'learning_rate': 0.1, 'max_depth': 5, 'n_estimators': 30, 'objective': 'binary:logistic', 'random_state': 16, 'use_label_encoder': False}
Feature selection for LGB ...
Feature selection for RF ...
Feature selection for XGB ...
Training LGB ...
Model LGB, Actual: [0 1], [31 14], Predictions: [0 1], [33 12]
Training RF ...
Model RF, Actual: [0 1], [31 14], Predictions: [0 1], [35 10]
Training XGB ...
Model XGB, Actual: [0 1], [31 14], Predictions: [0 1], [37  8]

[('hf_rr', 150), ('sdnn', 143), ('lf_rr', 113), ('rmssd', 107), ('bpm', 95)]


[('hf_rr', 0.39851332531205413), ('bpm', 0.17752936795516083), ('rmssd', 0.16591307750462442), ('sdnn', 0.16464352051497444), ('lf_rr', 0.093400708713186)]


[('hf_rr', 0.4558412), ('bpm', 0.15895627), ('lf_rr', 0.15240009), ('rmssd', 0

One or more of the test scores are non-finite: [0.68325602 0.67750144 0.67552772 0.67069765 0.67725036 0.65400768
 0.68535634 0.695198   0.67893949 0.67775372 0.69392295 0.71329846
 0.71044359 0.69878719 0.69440396 0.68412297 0.70278376 0.70222443
 0.69844317 0.70443949 0.70530524 0.71133547 0.7111092  0.70306209
 0.69730656        nan        nan        nan        nan        nan
        nan        nan        nan        nan        nan        nan
        nan        nan        nan        nan        nan        nan
        nan        nan        nan        nan        nan        nan
        nan        nan]


Grid search for XGB ...


One or more of the test scores are non-finite: [0.71995156 0.73229398 0.72354069 0.7147598  0.70732932 0.70958588
 0.71541228 0.71939172 0.70594552 0.6980065  0.71763081 0.71358079
 0.71547047 0.70435333 0.69772034 0.70964953 0.71481607 0.71168729
 0.69635459 0.70004297 0.71459939 0.71163217 0.70105673 0.70165094
 0.70159428 0.71500618 0.6929797  0.6947896  0.69488382 0.69125917
 0.71038332 0.69476089 0.70327126 0.7063628  0.69987312 0.70848925
 0.70124575 0.70259564 0.69840739 0.69743641 0.70911371 0.70560758
 0.70276094 0.70041044 0.6936279  0.71270971 0.70224275 0.69246316
 0.68651536 0.68767974 0.71801216 0.70203469 0.69196671 0.69717042
 0.69842851 0.69835939 0.68784347 0.68454108 0.6799759  0.67861519
 0.68795186 0.68373473 0.68163211 0.678564   0.67643082 0.71406279
 0.70434524 0.6935188  0.6900789  0.69105649 0.68551829 0.68350875
 0.67341849 0.67734718 0.67386832 0.70930117 0.69709569 0.69011848
 0.68860869 0.68895768        nan        nan        nan        nan
        nan    

LGB: {'max_depth': 3, 'metric': 'mean_absolute_error', 'num_leaves': 10, 'objective': 'binary'}
RF: {'max_features': 'sqrt', 'min_samples_split': 5, 'n_estimators': 20, 'random_state': 16}
XGB: {'eval_metric': 'error', 'learning_rate': 0.01, 'max_depth': 4, 'n_estimators': 20, 'objective': 'binary:logistic', 'random_state': 16, 'use_label_encoder': False}
Feature selection for LGB ...
Feature selection for RF ...
Feature selection for XGB ...
Training LGB ...
Model LGB, Actual: [0 1], [29 16], Predictions: [0 1], [35 10]
Training RF ...
Model RF, Actual: [0 1], [29 16], Predictions: [0 1], [34 11]
Training XGB ...
Model XGB, Actual: [0 1], [29 16], Predictions: [0 1], [38  7]

[('rmssd', 114), ('hf_rr', 113), ('sdnn', 104), ('bpm', 95), ('lf_rr', 54)]


[('lf_rr', 0.2921005097831796), ('hf_rr', 0.25559165050002675), ('sdnn', 0.1655418720741512), ('bpm', 0.14756349836127242), ('rmssd', 0.13920246928136995)]


[('hf_rr', 0.73079675), ('lf_rr', 0.09917789), ('rmssd', 0.06715129), ('bpm', 