In [1]:
import os
import sys
import pathlib

In [2]:
# in order to load the submodule located in parent directory as a package
orig_dir = os.getcwd()
package_dir = os.path.join(os.path.dirname(os.getcwd()))
sys.path.append(package_dir)

In [3]:
import numpy as np
import pandas as pd
from sklearn.model_selection import cross_validate
from sklearn.metrics import accuracy_score
from stresspred import (
    AudaceDataLoader,
    P5_StressDataLoader,
    get_cv_iterator,
    make_prediction_pipeline,
)

  "class": algorithms.Blowfish,


In [6]:
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier

out = AudaceDataLoader().get_split_pred_df(
    selected_tasks=["MENTAL", "CPT"],
    selected_signals=["ECG"],
    load_from_file=True,
    save_file=False,
)
seed = 0
nk_feats = [col for col in out["X"].columns if "HRV_" in col]

model_labels = ["Logistic Regression", "XGBoost"]
list_models = [LogisticRegression(random_state=seed), XGBClassifier(random_state=seed)]
selected_feature_labels = ["Only MedianNN", "MeanNN + RMSSD", "All Neurokit2 features"]

list_selected_features = [
    ["HRV_MedianNN"],
    ["HRV_MeanNN", "HRV_RMSSD"],
    nk_feats,
]

evaluation_labels = [
    "Cross-val on AUDACE",
    "Cross-val on P5_Stress",
    "Train on AUDACE + test on P5_Stress",
]
all_results = []
for eval_ind in range(len(evaluation_labels)):
    for est_ind in range(len(model_labels)):
        for feat_ind in range(len(selected_feature_labels)):

            selected_features = list_selected_features[feat_ind]
            est = list_models[est_ind]
            pipe_clf = make_prediction_pipeline(est=est)
            if eval_ind in [0, 2]:
                out = AudaceDataLoader().get_split_pred_df(
                    selected_tasks=["MENTAL", "CPT"],
                    selected_signals=["ECG"],
                    load_from_file=True,
                    save_file=False,
                )
                X_train = out["X"]
                y_train = out["y"]
                sub_train = out["sub"]
                task_train = out["task"]
                signal_train = out["signal"]
                if eval_ind == 2:
                    out = P5_StressDataLoader().get_split_pred_df(
                        selected_tasks=["MENTAL", "MENTALNOISE", "CPT"],
                        selected_signals=["ECG"],
                        load_from_file=True,
                        save_file=False,
                    )
                    X_test = out["X"]
                    y_test = out["y"]
                    sub_test = out["sub"]
                    task_test = out["task"]
            else:
                out = P5_StressDataLoader().get_split_pred_df(
                    selected_tasks=["MENTAL", "MENTALNOISE", "CPT"],
                    selected_signals=["ECG"],
                    load_from_file=True,
                    save_file=False,
                )
                X_train = out["X"]
                y_train = out["y"]
                sub_train = out["sub"]
                task_train = out["task"]
                signal_train = out["signal"]

            if eval_ind in [0, 1]:
                outer_cv, _ = get_cv_iterator(
                    sub_train,
                    n_outer_splits=5,
                    n_inner_splits=4,
                )
                cv_results = cross_validate(
                    pipe_clf, X_train.loc[:, selected_features], y_train, cv=outer_cv
                )
                accuracy = np.mean(cv_results["test_score"])
            else:
                pipe_clf.fit(X_train.loc[:, selected_features], y_train)
                y_pred = pipe_clf.predict(X_test.loc[:, selected_features])
                accuracy = accuracy_score(y_test, y_pred)
            accuracy_perc_str = str(np.round(accuracy * 100, 1)) + " %"
            res_dict = {
                "Signal": "ECG (tr) -> ECG (val)",
                "Model": model_labels[est_ind],
                "Feature set": selected_feature_labels[feat_ind],
                "Evaluation strategy": evaluation_labels[eval_ind],
                "Accuracy": accuracy_perc_str,
            }
            all_results.append(res_dict)

In [7]:
result_df = pd.DataFrame(all_results)

In [8]:
print(result_df.to_markdown(index=False))

| Signal                | Model               | Feature set            | Evaluation strategy                 | Accuracy   |
|:----------------------|:--------------------|:-----------------------|:------------------------------------|:-----------|
| ECG (tr) -> ECG (val) | Logistic Regression | Only MedianNN          | Cross-val on AUDACE                 | 76.0 %     |
| ECG (tr) -> ECG (val) | Logistic Regression | MeanNN + RMSSD         | Cross-val on AUDACE                 | 78.8 %     |
| ECG (tr) -> ECG (val) | Logistic Regression | All Neurokit2 features | Cross-val on AUDACE                 | 78.2 %     |
| ECG (tr) -> ECG (val) | XGBoost             | Only MedianNN          | Cross-val on AUDACE                 | 67.5 %     |
| ECG (tr) -> ECG (val) | XGBoost             | MeanNN + RMSSD         | Cross-val on AUDACE                 | 73.7 %     |
| ECG (tr) -> ECG (val) | XGBoost             | All Neurokit2 features | Cross-val on AUDACE                 | 75.2 %     |
| ECG (t