# PEP Benchmarking - EmpkinS Dataset Both Algorithms

This notebook executes the PEP Benchmarking challenges on the EmpkinS Datasets for PEP Extraction Pipelines using automated algorithms for both Q-wave onset and B-point extraction.

## Setup and Helper Functions

In [1]:
import json
from itertools import product
from pathlib import Path
from pprint import pprint

import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from fau_colors import cmaps, register_fausans_font
from IPython.display import Markdown
from tqdm.auto import tqdm

from pepbench.algorithms.ecg import (
    QPeakExtractionForounzafar2018,
    QPeakExtractionMartinez2004Neurokit,
    QPeakExtractionSciPyFindPeaksNeurokit,
    QPeakExtractionVanLien2013,
)
from pepbench.algorithms.heartbeat_segmentation import HeartbeatSegmentationNeurokit
from pepbench.algorithms.icg import (
    BPointExtractionArbol2017IsoelectricCrossings,
    BPointExtractionArbol2017SecondDerivative,
    BPointExtractionArbol2017ThirdDerivative,
    BPointExtractionDebski1993SecondDerivative,
    BPointExtractionDrost2022,
    BPointExtractionForouzanfar2018,
    BPointExtractionLozano2007LinearRegression,
    BPointExtractionLozano2007QuadraticRegression,
    BPointExtractionSherwood1990,
    BPointExtractionStern1985,
)
from pepbench.algorithms.outlier_correction import (
    OutlierCorrectionDummy,
    OutlierCorrectionForouzanfar2018,
    OutlierCorrectionLinearInterpolation,
)
from pepbench.datasets import EmpkinsDataset
from pepbench.evaluation import PepEvaluationChallenge
from pepbench.pipelines import PepExtractionPipeline

%matplotlib widget
%load_ext autoreload
%autoreload 2

In [2]:
register_fausans_font()
plt.close("all")

palette = sns.color_palette(cmaps.faculties)
sns.set_theme(context="notebook", style="ticks", font="sans-serif", palette=palette)

plt.rcParams["figure.figsize"] = (10, 5)
plt.rcParams["pdf.fonttype"] = 42
plt.rcParams["mathtext.default"] = "regular"
plt.rcParams["font.family"] = "sans-serif"
plt.rcParams["font.sans-serif"] = "FAUSans Office"

palette

In [3]:
root_path = Path("../../")

In [4]:
deploy_type = "local"

config_dict = json.load(root_path.joinpath("config.json").open(encoding="utf-8"))

empkins_base_path = Path(config_dict[deploy_type]["empkins_path"])
print(empkins_base_path)

/Users/richer/Documents/PhD/Projects/EmpkinS/HealthPsychology_D03/Data/2024_08_PEP_Benchmarking/EmpkinS_Dataset


In [5]:
result_path = root_path.joinpath("results/empkins_dataset_both_algorithms")
result_path.mkdir(exist_ok=True, parents=True)
result_path.resolve()

PosixPath('/Users/richer/Documents/PhD/Projects/EmpkinS/Code/pepbench/experiments/pep_algorithm_benchmarking/results/empkins_dataset_both_algorithms')

In [6]:
dataset_empkins = EmpkinsDataset(empkins_base_path, use_cache=True, only_labeled=True)
dataset_empkins

Unnamed: 0,participant,condition,phase
0,VP_001,tsst,Prep
1,VP_001,tsst,Pause_1
2,VP_001,tsst,Talk
3,VP_001,tsst,Math
4,VP_001,tsst,Pause_5
...,...,...,...
145,VP_032,ftsst,Prep
146,VP_032,ftsst,Pause_1
147,VP_032,ftsst,Talk
148,VP_032,ftsst,Math


## Initialize Algorithms

### Heartbeat Segmentation

In [7]:
heartbeat_segmentation_algo = HeartbeatSegmentationNeurokit()

### ECG - Q-Wave Onset

In [8]:
q_wave_algo_dict = {
    "martinez2004": QPeakExtractionMartinez2004Neurokit(),
    "forounzafar2018": QPeakExtractionForounzafar2018(),
    "scipy-findpeaks": QPeakExtractionSciPyFindPeaksNeurokit(),
}
q_wave_algo_dict.update(
    **{f"vanlien2013-{i}-ms": QPeakExtractionVanLien2013(time_interval_ms=i) for i in np.arange(32, 44, 2)}
)
q_wave_algos = list(q_wave_algo_dict.items())

print("Available Q-wave Onset algorithms:")
pprint(q_wave_algo_dict)

Available Q-wave Onset algorithms:
{'forounzafar2018': QPeakExtractionForounzafar2018(handle_missing_events='warn', scaling_factor=2000),
 'martinez2004': QPeakExtractionMartinez2004Neurokit(handle_missing_events='warn'),
 'scipy-findpeaks': QPeakExtractionSciPyFindPeaksNeurokit(handle_missing_events='warn'),
 'vanlien2013-32-ms': QPeakExtractionVanLien2013(handle_missing_events='warn', time_interval_ms=32),
 'vanlien2013-34-ms': QPeakExtractionVanLien2013(handle_missing_events='warn', time_interval_ms=34),
 'vanlien2013-36-ms': QPeakExtractionVanLien2013(handle_missing_events='warn', time_interval_ms=36),
 'vanlien2013-38-ms': QPeakExtractionVanLien2013(handle_missing_events='warn', time_interval_ms=38),
 'vanlien2013-40-ms': QPeakExtractionVanLien2013(handle_missing_events='warn', time_interval_ms=40),
 'vanlien2013-42-ms': QPeakExtractionVanLien2013(handle_missing_events='warn', time_interval_ms=42)}


### ICG - B-Point

In [9]:
b_point_algo_dict = {
    "stern1985": BPointExtractionStern1985(),
    "sherwood1990": BPointExtractionSherwood1990(),
    "debski1993-second-derivative": BPointExtractionDebski1993SecondDerivative(),
    "lozano2007-linear-regression": BPointExtractionLozano2007LinearRegression(),
    "lozano2007-quadratic-regression": BPointExtractionLozano2007QuadraticRegression(),
    "arbol2017-isoelectric-crossings": BPointExtractionArbol2017IsoelectricCrossings(),
    "arbol2017-second-derivative": BPointExtractionArbol2017SecondDerivative(),
    "arbol2017-third-derivative": BPointExtractionArbol2017ThirdDerivative(),
    "forounzafar2018": BPointExtractionForouzanfar2018(),
    "drost2022": BPointExtractionDrost2022(),
}
b_point_algos = list(b_point_algo_dict.items())

print("Available B-point algorithms:")
pprint(b_point_algo_dict)

Available B-point algorithms:
{'arbol2017-isoelectric-crossings': BPointExtractionArbol2017IsoelectricCrossings(handle_missing_events='warn'),
 'arbol2017-second-derivative': BPointExtractionArbol2017SecondDerivative(handle_missing_events='warn', search_window_start_ms=150, window_size_ms=50),
 'arbol2017-third-derivative': BPointExtractionArbol2017ThirdDerivative(handle_missing_events='warn', search_window_start_ms=300),
 'debski1993-second-derivative': BPointExtractionDebski1993SecondDerivative(correct_outliers=False, handle_missing_events='warn'),
 'drost2022': BPointExtractionDrost2022(correct_outliers=False, handle_missing_events='warn'),
 'forounzafar2018': BPointExtractionForouzanfar2018(correct_outliers=False, handle_missing_events='warn', scaling_factor=2000),
 'lozano2007-linear-regression': BPointExtractionLozano2007LinearRegression(handle_missing_events='warn', moving_average_window=1),
 'lozano2007-quadratic-regression': BPointExtractionLozano2007QuadraticRegression(handle

### Outlier Correction

In [10]:
outlier_correction_dict = {
    "none": OutlierCorrectionDummy(),
    "linear-interpolation": OutlierCorrectionLinearInterpolation(),
    "forouzanfar2018": OutlierCorrectionForouzanfar2018(),
}
outlier_correction_algos = list(outlier_correction_dict.items())

print("Available Outlier Correction algorithms:")
pprint(outlier_correction_dict)

Available Outlier Correction algorithms:
{'forouzanfar2018': OutlierCorrectionForouzanfar2018(),
 'linear-interpolation': OutlierCorrectionLinearInterpolation(),
 'none': OutlierCorrectionDummy()}


In [11]:
algo_combinations = list(product(q_wave_algos, b_point_algos, outlier_correction_algos))

In [12]:
big_result_dict = {}

for algo_combi in tqdm(algo_combinations):
    algo_combi_names = [key[0] for key in algo_combi]

    filename_stub = "pep_results_empkins_" + "_".join(algo_combi_names)

    # check if exists
    # if len(list(result_path.glob(f"{filename_stub}*"))) != 0:
    #    display(Markdown(f"Algrotihm combination {algo_combi_names} already challenged. Skipping..."))
    #    continue

    display(Markdown(f"Running algorithm combination {algo_combi_names}"))
    pep_challenge = PepEvaluationChallenge(dataset=dataset_empkins, validate_kwargs={"n_jobs": -1})

    pipeline = PepExtractionPipeline(
        heartbeat_segmentation_algo=heartbeat_segmentation_algo,
        q_wave_algo=algo_combi[0][1],
        b_point_algo=algo_combi[1][1],
        outlier_correction_algo=algo_combi[2][1],
        handle_negative_pep="nan",
        handle_missing_events="ignore",
    )
    pep_challenge.run(pipeline)
    pep_challenge.results_as_df()
    break
    pep_challenge.save_results(result_path, filename_stub)

    big_result_dict[tuple(algo_combi_names)] = pep_challenge

  0%|          | 0/270 [00:00<?, ?it/s]

Running algorithm combination ['martinez2004', 'stern1985', 'none']

Datapoints:   0%|          | 0/150 [00:00<?, ?it/s]

In [13]:
pprint(list(pep_challenge.results_.keys()))

['debug__score_time',
 'data_labels',
 'single__pep_reference_ms',
 'single__pep_estimated_ms',
 'single__error_ms',
 'single__absolute_error_ms',
 'single__absolute_relative_error_percent',
 'single__num_pep_total',
 'single__num_pep_valid',
 'single__num_pep_invalid',
 'single__pep_estimation_per_sample',
 'single__error_per_sample_ms',
 'single__absolute_error_per_sample_ms',
 'single__absolute_relative_error_per_sample_percent',
 'agg__pep_reference_ms__mean',
 'agg__pep_reference_ms__std',
 'agg__pep_estimated_ms__mean',
 'agg__pep_estimated_ms__std',
 'agg__error_ms__mean',
 'agg__error_ms__std',
 'agg__absolute_error_ms__mean',
 'agg__absolute_error_ms__std',
 'agg__absolute_relative_error_percent__mean',
 'agg__absolute_relative_error_percent__std',
 'agg__num_pep_total__mean',
 'agg__num_pep_total__std',
 'agg__num_pep_valid__mean',
 'agg__num_pep_valid__std',
 'agg__num_pep_invalid__mean',
 'agg__num_pep_invalid__std',
 'agg__error_per_sample_ms__mean',
 'agg__error_per_sampl

In [18]:
pprint(list(pep_challenge.results_.keys()))

['debug__score_time',
 'data_labels',
 'single__pep_reference_ms',
 'single__pep_estimated_ms',
 'single__error_ms',
 'single__absolute_error_ms',
 'single__absolute_relative_error_percent',
 'single__num_pep_total',
 'single__num_pep_valid',
 'single__num_pep_invalid',
 'single__pep_estimation_per_sample',
 'single__error_per_sample_ms',
 'single__absolute_error_per_sample_ms',
 'single__absolute_relative_error_per_sample_percent',
 'agg__pep_reference_ms__mean',
 'agg__pep_reference_ms__std',
 'agg__pep_estimated_ms__mean',
 'agg__pep_estimated_ms__std',
 'agg__error_ms__mean',
 'agg__error_ms__std',
 'agg__absolute_error_ms__mean',
 'agg__absolute_error_ms__std',
 'agg__absolute_relative_error_percent__mean',
 'agg__absolute_relative_error_percent__std',
 'agg__num_pep_total',
 'agg__num_pep_valid',
 'agg__num_pep_invalid',
 'agg__error_per_sample_ms__mean',
 'agg__error_per_sample_ms__std',
 'agg__absolute_error_per_sample_ms__mean',
 'agg__absolute_error_per_sample_ms__std',
 'agg