# PEP Benchmarking - EmpkinS Dataset Reference B-Point

This notebook executes the PEP Benchmarking challenges on the EmpkinS Datasets for PEP Extraction Pipelines using automated algorithms for Q-wave Onset extraction and reference labels for B-points.

## Setup and Helper Functions

In [1]:
import json
from itertools import product
from pathlib import Path
from pprint import pprint

import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from fau_colors import cmaps, register_fausans_font
from IPython.display import Markdown
from tqdm.auto import tqdm

from pepbench.algorithms.ecg import (
    QPeakExtractionForounzafar2018,
    QPeakExtractionMartinez2004Neurokit,
    QPeakExtractionSciPyFindPeaksNeurokit,
    QPeakExtractionVanLien2013,
)
from pepbench.algorithms.heartbeat_segmentation import HeartbeatSegmentationNeurokit
from pepbench.algorithms.outlier_correction import (
    OutlierCorrectionDummy,
)
from pepbench.datasets import EmpkinsDataset
from pepbench.evaluation import PepEvaluationChallenge
from pepbench.pipelines import PepExtractionPipelineReferenceBPoints

%matplotlib widget
%load_ext autoreload
%autoreload 2

In [3]:
#register_fausans_font()
plt.close("all")

palette = sns.color_palette(cmaps.faculties)
sns.set_theme(context="notebook", style="ticks", font="sans-serif", palette=palette)

plt.rcParams["figure.figsize"] = (10, 5)
plt.rcParams["pdf.fonttype"] = 42
plt.rcParams["mathtext.default"] = "regular"
plt.rcParams["font.family"] = "sans-serif"
#plt.rcParams["font.sans-serif"] = "FAUSans Office"

palette

In [4]:
root_path = Path("../../")

In [5]:
deploy_type = "local"

config_dict = json.load(root_path.joinpath("config.json").open(encoding="utf-8"))

empkins_base_path = Path(config_dict[deploy_type]["empkins_path"])
print(empkins_base_path)

\Users\sebas\Development\ResearchInternship\Data\2024_08_PEP_Benchmarking\EmpkinS_Dataset


In [6]:
result_path = root_path.joinpath("results/empkins_dataset_reference_b_point")
result_path.mkdir(exist_ok=True, parents=True)
result_path.resolve()

WindowsPath('C:/Users/sebas/Development/ResearchInternship/Code/pepbench/experiments/pep_algorithm_benchmarking/results/empkins_dataset_reference_b_point')

In [7]:
dataset_empkins = EmpkinsDataset(empkins_base_path, use_cache=True, only_labeled=True)
dataset_empkins

Unnamed: 0,participant,condition,phase
0,VP_001,tsst,Prep
1,VP_001,tsst,Pause_1
2,VP_001,tsst,Talk
3,VP_001,tsst,Math
4,VP_001,tsst,Pause_5
...,...,...,...
145,VP_032,ftsst,Prep
146,VP_032,ftsst,Pause_1
147,VP_032,ftsst,Talk
148,VP_032,ftsst,Math


## Initialize Algorithms

### Heartbeat Segmentation

In [8]:
heartbeat_segmentation_algo = HeartbeatSegmentationNeurokit()

### ECG - Q-Wave Onset

In [9]:
q_wave_algo_dict = {
    "martinez2004": QPeakExtractionMartinez2004Neurokit(),
    "forounzafar2018": QPeakExtractionForounzafar2018(),
    "scipy-findpeaks": QPeakExtractionSciPyFindPeaksNeurokit(),
}
q_wave_algo_dict.update(
    **{f"vanlien2013-{i}-ms": QPeakExtractionVanLien2013(time_interval_ms=i) for i in np.arange(32, 44, 2)}
)
q_wave_algos = list(q_wave_algo_dict.items())

print("Available Q-wave Onset algorithms:")
pprint(q_wave_algo_dict)

Available Q-wave Onset algorithms:
{'dwt-neurokit': QPeakExtractionNeurokitDwt(handle_missing_events='warn'),
 'r-peak-diff-32-ms': QWaveOnsetExtractionVanLien2013(time_interval_ms=32),
 'r-peak-diff-34-ms': QWaveOnsetExtractionVanLien2013(time_interval_ms=34),
 'r-peak-diff-36-ms': QWaveOnsetExtractionVanLien2013(time_interval_ms=36),
 'r-peak-diff-38-ms': QWaveOnsetExtractionVanLien2013(time_interval_ms=38),
 'r-peak-diff-40-ms': QWaveOnsetExtractionVanLien2013(time_interval_ms=40),
 'r-peak-diff-42-ms': QWaveOnsetExtractionVanLien2013(time_interval_ms=42)}


### ICG - B-Point

In [10]:
b_point_algos = [("b-point-reference", None)]

### Outlier Correction

In [11]:
outlier_correction_dict = {"none": OutlierCorrectionDummy()}
outlier_correction_algos = list(outlier_correction_dict.items())

print("Available Outlier Correction algorithms:")
pprint(outlier_correction_dict)

Available Outlier Correction algorithms:
{'none': OutlierCorrectionDummy()}


In [12]:
algo_combinations = list(product(q_wave_algos, b_point_algos, outlier_correction_algos))

In [13]:
big_result_dict = {}

for algo_combi in tqdm(algo_combinations):
    algo_combi_names = [key[0] for key in algo_combi]

    filename_stub = "pep_results_empkins_" + "_".join(algo_combi_names)

    # check if exists
    if len(list(result_path.glob(f"{filename_stub}*"))) != 0:
        display(Markdown(f"Algrotihm combination {algo_combi_names} already challenged. Skipping..."))
        continue

    display(Markdown(f"Running algorithm combination {algo_combi_names}"))
    pep_challenge = PepEvaluationChallenge(dataset=dataset_empkins, validate_kwargs={"n_jobs": -1})

    pipeline = PepExtractionPipelineReferenceBPoints(
        heartbeat_segmentation_algo=heartbeat_segmentation_algo,
        q_wave_algo=algo_combi[0][1],
        b_point_algo=algo_combi[1][1],
        outlier_correction_algo=algo_combi[2][1],
        handle_negative_pep="nan",
        handle_missing_events="ignore",
    )
    pep_challenge.run(pipeline)
    pep_challenge.results_as_df()
    pep_challenge.save_results(result_path, filename_stub)

    big_result_dict[tuple(algo_combi_names)] = pep_challenge

  0%|          | 0/7 [00:00<?, ?it/s]

Algrotihm combination ['dwt-neurokit', 'b-point-reference', 'none'] already challenged. Skipping...

Algrotihm combination ['r-peak-diff-32-ms', 'b-point-reference', 'none'] already challenged. Skipping...

Algrotihm combination ['r-peak-diff-34-ms', 'b-point-reference', 'none'] already challenged. Skipping...

Algrotihm combination ['r-peak-diff-36-ms', 'b-point-reference', 'none'] already challenged. Skipping...

Algrotihm combination ['r-peak-diff-38-ms', 'b-point-reference', 'none'] already challenged. Skipping...

Algrotihm combination ['r-peak-diff-40-ms', 'b-point-reference', 'none'] already challenged. Skipping...

Algrotihm combination ['r-peak-diff-42-ms', 'b-point-reference', 'none'] already challenged. Skipping...