# PEP Benchmarking B-Point - EmpkinS Dataset

This notebook executes the PEP Benchmarking challenges on the EmpkinS Datasets for PEP Extraction Pipelines using reference labels for Q-peaks and automated algorithms for B-point extraction.

## Setup and Helper Functions

In [1]:
import json
from itertools import product
from pathlib import Path
from pprint import pprint

import matplotlib.pyplot as plt
import seaborn as sns
from fau_colors import cmaps, register_fausans_font
from IPython.display import Markdown
from tqdm.auto import tqdm

from pepbench.algorithms.heartbeat_segmentation import HeartbeatSegmentationNeurokit
from pepbench.algorithms.icg import (
    BPointExtractionArbol2017IsoelectricCrossings,
    BPointExtractionArbol2017SecondDerivative,
    BPointExtractionArbol2017ThirdDerivative,
    BPointExtractionDebski1993SecondDerivative,
    BPointExtractionDrost2022,
    BPointExtractionForouzanfar2018,
    BPointExtractionLozano2007LinearRegression,
    BPointExtractionLozano2007QuadraticRegression,
    BPointExtractionSherwood1990,
    BPointExtractionStern1985,
)
from pepbench.algorithms.outlier_correction import (
    OutlierCorrectionDummy,
    OutlierCorrectionForouzanfar2018,
    OutlierCorrectionLinearInterpolation,
)
from pepbench.datasets import EmpkinsDataset
from pepbench.evaluation import PepEvaluationChallenge
from pepbench.pipelines import PepExtractionPipelineReferenceQPeak

%matplotlib widget
%load_ext autoreload
%autoreload 2

In [2]:
register_fausans_font()
plt.close("all")

palette = sns.color_palette(cmaps.faculties)
sns.set_theme(context="notebook", style="ticks", font="sans-serif", palette=palette)

plt.rcParams["figure.figsize"] = (10, 5)
plt.rcParams["pdf.fonttype"] = 42
plt.rcParams["mathtext.default"] = "regular"
plt.rcParams["font.family"] = "sans-serif"
plt.rcParams["font.sans-serif"] = "FAUSans Office"

palette

In [3]:
root_path = Path("../../")

In [4]:
deploy_type = "local"

config_dict = json.load(root_path.joinpath("config.json").open(encoding="utf-8"))

empkins_base_path = Path(config_dict[deploy_type]["empkins_path"])
print(empkins_base_path)

/Users/richer/Documents/PhD/Projects/EmpkinS/HealthPsychology_D03/Data/2024_08_PEP_Benchmarking/EmpkinS_Dataset


In [5]:
result_path = root_path.joinpath("results/empkins_dataset_b_point")
result_path.mkdir(exist_ok=True, parents=True)
result_path.resolve()

PosixPath('/Users/richer/Documents/PhD/Code_Repositories/EmpkinS/pepbench/experiments/pep_algorithm_benchmarking/results/empkins_dataset_b_point')

In [6]:
dataset_empkins = EmpkinsDataset(empkins_base_path, use_cache=True, only_labeled=True)
dataset_empkins

Unnamed: 0,participant,condition,phase
0,VP_001,tsst,Prep
1,VP_001,tsst,Pause_1
2,VP_001,tsst,Talk
3,VP_001,tsst,Math
4,VP_001,tsst,Pause_5
...,...,...,...
145,VP_032,ftsst,Prep
146,VP_032,ftsst,Pause_1
147,VP_032,ftsst,Talk
148,VP_032,ftsst,Math


## Initialize Algorithms

### Heartbeat Segmentation

In [7]:
heartbeat_segmentation_algo = HeartbeatSegmentationNeurokit()

### ECG - Q-Peak

In [8]:
q_peak_algo_dict = {"q-peak-reference": None}
q_peak_algos = list(q_peak_algo_dict.items())

print("Available Q-Peak algorithms:")
pprint(q_peak_algo_dict)

Available Q-Peak algorithms:
{'q-peak-reference': None}


### ICG - B-Point

In [9]:
b_point_algo_dict = {
    "stern1985": BPointExtractionStern1985(),
    "sherwood1990": BPointExtractionSherwood1990(),
    "debski1993-second-derivative": BPointExtractionDebski1993SecondDerivative(),
    "lozano2007-linear-regression": BPointExtractionLozano2007LinearRegression(),
    "lozano2007-quadratic-regression": BPointExtractionLozano2007QuadraticRegression(),
    "arbol2017-isoelectric-crossings": BPointExtractionArbol2017IsoelectricCrossings(),
    "arbol2017-second-derivative": BPointExtractionArbol2017SecondDerivative(),
    "arbol2017-third-derivative": BPointExtractionArbol2017ThirdDerivative(),
    "forouzanfar2018": BPointExtractionForouzanfar2018(),
    "drost2022": BPointExtractionDrost2022(),
}
b_point_algos = list(b_point_algo_dict.items())

print("Available B-point algorithms:")
pprint(b_point_algo_dict)

Available B-point algorithms:
{'arbol2017-isoelectric-crossings': BPointExtractionArbol2017IsoelectricCrossings(handle_missing_events='warn'),
 'arbol2017-second-derivative': BPointExtractionArbol2017SecondDerivative(handle_missing_events='warn', search_window_start_ms=150, window_size_ms=50),
 'arbol2017-third-derivative': BPointExtractionArbol2017ThirdDerivative(handle_missing_events='warn', search_window_start_ms=300),
 'debski1993-second-derivative': BPointExtractionDebski1993SecondDerivative(correct_outliers=False, handle_missing_events='warn'),
 'drost2022': BPointExtractionDrost2022(correct_outliers=False, handle_missing_events='warn'),
 'forouzanfar2018': BPointExtractionForouzanfar2018(correct_outliers=False, handle_missing_events='warn', scaling_factor=2000),
 'lozano2007-linear-regression': BPointExtractionLozano2007LinearRegression(handle_missing_events='warn', moving_average_window=1),
 'lozano2007-quadratic-regression': BPointExtractionLozano2007QuadraticRegression(handle

### Outlier Correction

In [10]:
outlier_correction_dict = {
    "none": OutlierCorrectionDummy(),
    "linear-interpolation": OutlierCorrectionLinearInterpolation(),
    "forouzanfar2018": OutlierCorrectionForouzanfar2018(),
}
outlier_correction_algos = list(outlier_correction_dict.items())

print("Available Outlier Correction algorithms:")
pprint(outlier_correction_dict)

Available Outlier Correction algorithms:
{'forouzanfar2018': OutlierCorrectionForouzanfar2018(),
 'linear-interpolation': OutlierCorrectionLinearInterpolation(),
 'none': OutlierCorrectionDummy()}


In [11]:
algo_combinations = list(product(q_peak_algos, b_point_algos, outlier_correction_algos))

In [12]:
# subset = dataset_empkins.get_subset(participant="VP_001", condition="tsst", phase="Prep")

In [13]:
big_result_dict = {}

for algo_combi in tqdm(algo_combinations):
    algo_combi_names = [key[0] for key in algo_combi]

    filename_stub = "pep_results_empkins_" + "_".join(algo_combi_names)

    # check if exists
    if len(list(result_path.glob(f"{filename_stub}*"))) != 0:
        display(Markdown(f"Algorithm combination {algo_combi_names} already challenged. Skipping..."))
        continue

    display(Markdown(f"Running algorithm combination {algo_combi_names}"))
    pep_challenge = PepEvaluationChallenge(dataset=dataset_empkins, validate_kwargs={"n_jobs": -1})

    pipeline = PepExtractionPipelineReferenceQPeak(
        heartbeat_segmentation_algo=heartbeat_segmentation_algo,
        q_peak_algo=algo_combi[0][1],
        b_point_algo=algo_combi[1][1],
        outlier_correction_algo=algo_combi[2][1],
        handle_negative_pep="nan",
        handle_missing_events="ignore",
    )
    pep_challenge.run(pipeline)
    pep_challenge.results_as_df()
    pep_challenge.save_results(result_path, filename_stub)

    big_result_dict[tuple(algo_combi_names)] = pep_challenge

  0%|          | 0/30 [00:00<?, ?it/s]

Running algorithm combination ['q-peak-reference', 'stern1985', 'none']

Datapoints:   0%|          | 0/150 [00:00<?, ?it/s]

Running algorithm combination ['q-peak-reference', 'stern1985', 'linear-interpolation']

Datapoints:   0%|          | 0/150 [00:00<?, ?it/s]

Running algorithm combination ['q-peak-reference', 'stern1985', 'forouzanfar2018']

Datapoints:   0%|          | 0/150 [00:00<?, ?it/s]

Running algorithm combination ['q-peak-reference', 'sherwood1990', 'none']

Datapoints:   0%|          | 0/150 [00:00<?, ?it/s]

Running algorithm combination ['q-peak-reference', 'sherwood1990', 'linear-interpolation']

Datapoints:   0%|          | 0/150 [00:00<?, ?it/s]

Running algorithm combination ['q-peak-reference', 'sherwood1990', 'forouzanfar2018']

Datapoints:   0%|          | 0/150 [00:00<?, ?it/s]

Running algorithm combination ['q-peak-reference', 'debski1993-second-derivative', 'none']

Datapoints:   0%|          | 0/150 [00:00<?, ?it/s]

Running algorithm combination ['q-peak-reference', 'debski1993-second-derivative', 'linear-interpolation']

Datapoints:   0%|          | 0/150 [00:00<?, ?it/s]

Running algorithm combination ['q-peak-reference', 'debski1993-second-derivative', 'forouzanfar2018']

Datapoints:   0%|          | 0/150 [00:00<?, ?it/s]

Running algorithm combination ['q-peak-reference', 'lozano2007-linear-regression', 'none']

Datapoints:   0%|          | 0/150 [00:00<?, ?it/s]

Running algorithm combination ['q-peak-reference', 'lozano2007-linear-regression', 'linear-interpolation']

Datapoints:   0%|          | 0/150 [00:00<?, ?it/s]

Running algorithm combination ['q-peak-reference', 'lozano2007-linear-regression', 'forouzanfar2018']

Datapoints:   0%|          | 0/150 [00:00<?, ?it/s]

Running algorithm combination ['q-peak-reference', 'lozano2007-quadratic-regression', 'none']

Datapoints:   0%|          | 0/150 [00:00<?, ?it/s]

Running algorithm combination ['q-peak-reference', 'lozano2007-quadratic-regression', 'linear-interpolation']

Datapoints:   0%|          | 0/150 [00:00<?, ?it/s]

Running algorithm combination ['q-peak-reference', 'lozano2007-quadratic-regression', 'forouzanfar2018']

Datapoints:   0%|          | 0/150 [00:00<?, ?it/s]

Running algorithm combination ['q-peak-reference', 'arbol2017-isoelectric-crossings', 'none']

Datapoints:   0%|          | 0/150 [00:00<?, ?it/s]

Running algorithm combination ['q-peak-reference', 'arbol2017-isoelectric-crossings', 'linear-interpolation']

Datapoints:   0%|          | 0/150 [00:00<?, ?it/s]

Running algorithm combination ['q-peak-reference', 'arbol2017-isoelectric-crossings', 'forouzanfar2018']

Datapoints:   0%|          | 0/150 [00:00<?, ?it/s]

Running algorithm combination ['q-peak-reference', 'arbol2017-second-derivative', 'none']

Datapoints:   0%|          | 0/150 [00:00<?, ?it/s]

Running algorithm combination ['q-peak-reference', 'arbol2017-second-derivative', 'linear-interpolation']

Datapoints:   0%|          | 0/150 [00:00<?, ?it/s]

Running algorithm combination ['q-peak-reference', 'arbol2017-second-derivative', 'forouzanfar2018']

Datapoints:   0%|          | 0/150 [00:00<?, ?it/s]

Running algorithm combination ['q-peak-reference', 'arbol2017-third-derivative', 'none']

Datapoints:   0%|          | 0/150 [00:00<?, ?it/s]

Running algorithm combination ['q-peak-reference', 'arbol2017-third-derivative', 'linear-interpolation']

Datapoints:   0%|          | 0/150 [00:00<?, ?it/s]

Running algorithm combination ['q-peak-reference', 'arbol2017-third-derivative', 'forouzanfar2018']

Datapoints:   0%|          | 0/150 [00:00<?, ?it/s]

Running algorithm combination ['q-peak-reference', 'forouzanfar2018', 'none']

Datapoints:   0%|          | 0/150 [00:00<?, ?it/s]

Running algorithm combination ['q-peak-reference', 'forouzanfar2018', 'linear-interpolation']

Datapoints:   0%|          | 0/150 [00:00<?, ?it/s]

Running algorithm combination ['q-peak-reference', 'forouzanfar2018', 'forouzanfar2018']

Datapoints:   0%|          | 0/150 [00:00<?, ?it/s]

Running algorithm combination ['q-peak-reference', 'drost2022', 'none']

Datapoints:   0%|          | 0/150 [00:00<?, ?it/s]

Running algorithm combination ['q-peak-reference', 'drost2022', 'linear-interpolation']

Datapoints:   0%|          | 0/150 [00:00<?, ?it/s]

Running algorithm combination ['q-peak-reference', 'drost2022', 'forouzanfar2018']

Datapoints:   0%|          | 0/150 [00:00<?, ?it/s]