# Analysis PEP Algorithms

## Setup and Helper Functions

### Imports

In [None]:
import json
from pathlib import Path

import biopsykit as bp
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from biopsykit.utils.dataframe_handling import multi_xs
from fau_colors import cmaps, register_fausans_font

from pepbench.data_handling import (
    add_unique_id_to_results_dataframe,
    compute_improvement_pipeline,
    compute_pep_performance_metrics,
    get_error_by_group,
)
from pepbench.datasets import EmpkinsDataset, GuardianDataset
from pepbench.export import convert_to_latex, create_algorithm_result_table
from pepbench.io import load_challenge_results_from_folder
from pepbench.plotting.results import (
    boxplot_algorithm_performance,
    paired_plot_error_pep_pipeline,
    regplot_error_heart_rate,
    residual_plot_pep,
    residual_plot_pep_heart_rate,
    residual_plot_pep_participant,
    residual_plot_pep_phase,
    violinplot_algorithm_performance,
)
from pepbench.utils import rename_algorithms, rename_metrics, styling

%matplotlib widget
%load_ext autoreload
%autoreload 2

In [None]:
register_fausans_font()
plt.close("all")

palette = sns.color_palette(cmaps.faculties_light)
sns.set_theme(context="notebook", style="ticks", font="sans-serif", palette=palette)

plt.rcParams["figure.figsize"] = (10, 5)
plt.rcParams["pdf.fonttype"] = 42
plt.rcParams["mathtext.default"] = "regular"
plt.rcParams["font.family"] = "sans-serif"
plt.rcParams["font.sans-serif"] = "FAUSans Office"

palette

In [None]:
root_path = Path("../../")

In [None]:
deploy_type = "local"

config_dict = json.load(root_path.joinpath("config.json").open(encoding="utf-8"))

empkins_base_path = Path(config_dict[deploy_type]["empkins_path"])
guardian_base_path = Path(config_dict[deploy_type]["guardian_path"])
print(empkins_base_path)

### Input Paths

In [None]:
result_path = root_path.joinpath("results")

### Output Paths

In [None]:
paper_path = json.load(root_path.joinpath("paper_path.json").open(encoding="utf-8"))["paper_path"]
paper_path = Path(paper_path)

export_path = root_path.joinpath("exports")
img_path = export_path.joinpath("plots")
stats_path = export_path.joinpath("stats")

img_path_paper = paper_path.joinpath("img")
tab_path_paper = paper_path.joinpath("tab")
suppl_img_path_paper = paper_path.joinpath("supplementary_material/img")
suppl_tab_path_paper = paper_path.joinpath("supplementary_material/tab")

bp.utils.file_handling.mkdirs(
    [
        result_path,
        export_path,
        img_path,
        stats_path,
        img_path_paper,
        tab_path_paper,
        suppl_img_path_paper,
        suppl_tab_path_paper,
    ]
)

In [None]:
algo_levels = ["q_peak_algorithm", "b_point_algorithm", "outlier_correction_algorithm"]
algo_level_mapping = dict(
    zip(algo_levels, ["Q-Peak Algorithm", "B-Point Algorithm", "Outlier Correction"], strict=False)
)

In [None]:
deploy_type = "local"

config_dict = json.load(root_path.joinpath("config.json").open(encoding="utf-8"))

empkins_base_path = Path(config_dict[deploy_type]["empkins_path"])
guardian_base_path = Path(config_dict[deploy_type]["guardian_path"])
print(empkins_base_path)

## EmpkinS Dataset

In [None]:
dataset_empkins = EmpkinsDataset(empkins_base_path, use_cache=True, only_labeled=True)
dataset_empkins

In [None]:
results_empkins = load_challenge_results_from_folder(
    result_path.joinpath("empkins_dataset_both_algorithms"),
    index_cols_per_sample=["participant", "condition", "phase"],
)

In [None]:
results_per_sample_empkins = results_empkins.per_sample
results_agg_total_empkins = results_empkins.agg_total
results_per_sample_empkins.head()

### Results Table

In [None]:
metrics_empkins = compute_pep_performance_metrics(results_per_sample_empkins, num_heartbeats=results_agg_total_empkins)
metrics_empkins.head(n=10).style.highlight_min(
    subset=["Mean Absolute Error [ms]", "Mean Absolute Relative Error [%]"], props="background-color: LightGreen;"
)
# metrics_empkins.head(n=20)

In [None]:
result_table = create_algorithm_result_table(metrics_empkins)

latex_output = convert_to_latex(
    result_table,
    collapse_index_columns=False,
    column_header_bold=True,
    environment="longtable",
    column_format="p{2.25cm}p{2.0cm}p{2.0cm}S[table-format=1.1(2)]S[table-format=1.1(2)]S[table-format=1.1(2)]p{2.0cm}",
    caption=r"Results of combined \ac{PEP} extraction pipelines on the \textit{EmpkinS Dataset}. The pipelines are sorted by the MAE in ascending order.",
    label="tab:b_point_results_empkins",
)

# fix pandas bug that does not format the last column name in bold
# latex_output = latex_output.replace(r"{Invalid", r"{\bfseries Invalid")
# some manual post-processing
latex_output = latex_output.replace(r"\ac", r"")
latex_output = latex_output.replace(r"\begin{longtable}", r"\begin{small}\begin{longtable}")
latex_output = latex_output.replace(r"\end{longtable}", r"\end{longtable}\end{small}")
latex_output = latex_output.replace(r"Van13 ", r"Van13\,")
latex_output = latex_output.replace(r"\font-weightbold ", r"\bfseries ")
latex_output = latex_output.replace(r"{B-Point Detection}", r"{B-Point\newline Detection}")
latex_output = latex_output.replace(r"{Q-Peak Detection}", r"{Q-Peak\newline Detection}")
latex_output = latex_output.replace(r"{Outlier Correction}", r"{Outlier\newline Correction}")

suppl_tab_path_paper.joinpath("tab_pep_pipeline_results_empkins.tex").open(mode="w+").write(latex_output)
print(latex_output)

### Plots

In [None]:
selected_pipelines_for_plotting_empkins = [
    ("forouzanfar2018", "drost2022", "none"),
    ("vanlien2013-32-ms", "drost2022", "none"),
    ("forouzanfar2018", "arbol2017-third-derivative", "none"),
    ("forouzanfar2018", "lozano2007-linear-regression", "none"),
]

In [None]:
results_empkins_plot = multi_xs(
    data=results_per_sample_empkins, keys=selected_pipelines_for_plotting_empkins, level=algo_levels
)
# results_empkins_plot = results_empkins_plot.droplevel("outlier_correction_algorithm")
# results_empkins_plot = results_empkins_plot.reindex([s[0] for s in selected_algos_for_plotting_empkins], level=0)
results_empkins_plot.head()

#### Absolute Error

In [None]:
fig, ax = boxplot_algorithm_performance(
    results_empkins_plot,
    metric="absolute_error_per_sample_ms",
    showmeans=True,
    figsize=(6, 5),
)

fig.savefig(img_path.joinpath("img_boxplot_pep_pipelines_mae_empkins.pdf"), transparent=True)

In [None]:
fig, ax = violinplot_algorithm_performance(
    results_empkins_plot,
    metric="absolute_error_per_sample_ms",
    showmeans=True,
    figsize=(6, 5),
)

#### Absolute Error (with and without Outlier)

In [None]:
fig, axs = plt.subplots(ncols=2, figsize=(10, 3))

boxplot_algorithm_performance(
    results_empkins_plot,
    metric="absolute_error_per_sample_ms",
    showmeans=True,
    showfliers=True,
    width=0.9,
    title="PEP Pipeline Results – With Outlier",
    fig=fig,
    ax=axs[0],
)
boxplot_algorithm_performance(
    results_empkins_plot,
    metric="absolute_error_per_sample_ms",
    showmeans=True,
    showfliers=False,
    width=0.9,
    title="PEP Pipeline Results – Without Outlier",
    fig=fig,
    ax=axs[1],
)
for ax in axs:
    ax.set_xlabel(None)
fig.tight_layout()

for path in [img_path, suppl_img_path_paper]:
    fig.savefig(path.joinpath("img_boxplot_pep_pipelines_mae_with_without_outlier_empkins.pdf"), transparent=True)

#### Error

In [None]:
fig, ax = violinplot_algorithm_performance(
    results_empkins_plot,
    metric="error_per_sample_ms",
    showmeans=True,
    figsize=(6, 5),
)

#### Error per Participant

In [None]:
error_per_participant_empkins = get_error_by_group(results_per_sample_empkins, grouper="participant")
error_per_participant_empkins = multi_xs(
    error_per_participant_empkins, selected_pipelines_for_plotting_empkins, level=algo_levels, axis=1
)
error_per_participant_empkins = error_per_participant_empkins.round(2)
error_per_participant_empkins = error_per_participant_empkins.rename(columns=rename_algorithms).rename(
    columns=rename_metrics
)

error_per_participant_empkins.style.highlight_max(props="background-color: Pink;")

In [None]:
latex_output = convert_to_latex(
    error_per_participant_empkins.style.highlight_max(props="background-color: Pink;").format_index(
        escape="latex", axis=0
    ),
    collapse_index_columns=False,
    column_header_bold=True,
    column_format="p{3.5cm}" + "S[table-format=2.2]" * len(error_per_participant_empkins.columns),
    caption=r"Mean Abolute Error of selected PEP extraction pipelines on the \textit{EmpkinS Dataset} per participant. The values with the highest errors are highlighted in red.",
    label="tab:pep_pipeline_results_per_participant_empkins",
)

# fix pandas bug that does not format the last column name in bold
latex_output = latex_output.replace(r"\begin{table}[ht]", r"\begin{table}[ht]\small")
latex_output = latex_output.replace(r"q_peak_algorithm", r"\bfseries Q-peak Algorithm")
latex_output = latex_output.replace(r"b_point_algorithm", r"\bfseries B-point Algorithm")
latex_output = latex_output.replace(r"outlier_correction_algorithm", r"\bfseries Outlier Correction Algorithm")
latex_output = latex_output.replace(r"{participant}", r"{Participant}")
latex_output = latex_output.replace(r"{metric}", r"{}")
latex_output = latex_output.replace(r"{\bfseries mean}", r"{Mean}")
latex_output = latex_output.replace(r"{\bfseries std}", r"{SD}")
latex_output = latex_output.replace(r"{std}", r"{SD}")
latex_output = latex_output.replace(r"\sisetup{", r"\sisetup{round-mode=places,round-precision=2,")

suppl_tab_path_paper.joinpath("tab_pep_pipeline_results_per_participant_empkins.tex").open(mode="w+").write(
    latex_output
)

print(latex_output)

#### Residual Plots

In [None]:
selected_pipelines_for_residual_empkins = [
    ("forouzanfar2018", "drost2022", "none"),
    # ("vanlien2013-32-ms", "drost2022", "none"),
    ("forouzanfar2018", "arbol2017-third-derivative", "none"),
    ("forouzanfar2018", "lozano2007-linear-regression", "none"),
]

In [None]:
fig, axs = plt.subplots(ncols=3, figsize=(12, 4), sharey=True)

for i, ax in enumerate(axs):
    fig, ax = residual_plot_pep(
        results_empkins_plot,
        selected_pipelines_for_residual_empkins[i],
        alpha=0.5,
        show_upper_limit=True,
        ax=ax,
        annotate_fontsize="small",
        annotate_bbox=True,
    )
    if i != 0:
        ax.set_ylabel(None)
axs[0].set_ylim([-125, 225])

fig.tight_layout()

for path in [img_path, suppl_img_path_paper]:
    fig.savefig(path.joinpath("img_residual_plots_pep_pipelines_empkins.pdf"), transparent=True)

In [None]:
fig, axs = plt.subplots(ncols=3, figsize=(12, 4), sharey=True)

for i, ax in enumerate(axs):
    fig, ax = residual_plot_pep_participant(
        results_empkins_plot,
        selected_pipelines_for_residual_empkins[i],
        alpha=0.5,
        show_upper_limit=True,
        ax=ax,
        annotate_fontsize="small",
        annotate_bbox=True,
    )
    if i != 0:
        ax.set_ylabel(None)
axs[0].set_ylim([-125, 150])

fig.tight_layout()

for path in [img_path, suppl_img_path_paper]:
    fig.savefig(path.joinpath("img_residual_plots_pep_pipelines_per_participant_empkins.pdf"), transparent=True)

In [None]:
fig, axs = plt.subplots(ncols=3, figsize=(12, 4), sharey=True)

for i, ax in enumerate(axs):
    fig, ax = residual_plot_pep_phase(
        results_empkins_plot,
        selected_pipelines_for_residual_empkins[i],
        alpha=0.5,
        show_upper_limit=True,
        ax=ax,
        annotate_fontsize="small",
        annotate_bbox=True,
    )
    if i != 0:
        ax.set_ylabel(None)

axs[0].set_ylim([-125, 150])

for path in [img_path, suppl_img_path_paper]:
    fig.savefig(path.joinpath("img_residual_plots_pep_pipelines_per_phase_empkins.pdf"), transparent=True)

In [None]:
fig, axs = plt.subplots(ncols=3, figsize=(12, 4), sharey=True)

for i, ax in enumerate(axs):
    fig, ax = residual_plot_pep_heart_rate(
        results_empkins_plot,
        selected_pipelines_for_residual_empkins[i],
        alpha=0.5,
        show_upper_limit=True,
        ax=ax,
        annotate_fontsize="small",
        annotate_bbox=True,
    )
    if i != 0:
        ax.set_ylabel(None)

axs[0].set_ylim([-125, 150])

for path in [img_path, suppl_img_path_paper]:
    fig.savefig(path.joinpath("img_residual_plots_pep_pipelines_heart_rate_empkins.pdf"), transparent=True)

In [None]:
fig, axs = plt.subplots(ncols=3, figsize=(12, 4), sharey=True)

for i, ax in enumerate(axs):
    fig, ax = regplot_error_heart_rate(
        results_per_sample_empkins,
        selected_pipelines_for_residual_empkins[i],
        error_metric="absolute_error_per_sample_ms",
        add_corr_coeff=True,
        ax=ax,
    )
    if i != 0:
        ax.set_ylabel(None)

fig.tight_layout()

for path in [img_path, suppl_img_path_paper]:
    fig.savefig(path.joinpath("img_pep_pipelines_error_heart_rate_empkins.pdf"), transparent=True)

#### Effect of Outlier Correction on Estimation Error

In [None]:
outlier_algos = ["none", "linear-interpolation", "forouzanfar2018"]
outlier_algos_rename = ["None", "LinInt", "For18"]

In [None]:
metrics_empkins_outlier = metrics_empkins.groupby(["q_peak_algorithm", "b_point_algorithm"], group_keys=False).apply(
    lambda df: df.reindex(outlier_algos, level="outlier_correction_algorithm")
)
metrics_empkins_outlier.head(n=10).style.apply(
    styling.highlight_outlier_improvement, subset=["Mean Absolute Error [ms]", "Invalid PEPs"]
)

#### Horizontal Table

In [None]:
metric_empkins_unstack = metrics_empkins_outlier[["Mean Absolute Error [ms]"]].unstack("outlier_correction_algorithm")
metric_empkins_unstack = metric_empkins_unstack.reorder_levels([0, 2, 1], axis=1)
metric_empkins_unstack = metric_empkins_unstack.sort_index(axis=1).reindex(
    outlier_algos, level="outlier_correction_algorithm", axis=1
)
metric_empkins_unstack_style = metric_empkins_unstack.style.highlight_min(
    subset=list(filter(lambda s: "mean" in s, metric_empkins_unstack.columns)), axis=1, props="font-weight: bold;"
)
metric_empkins_unstack_style

#### Horizontal Table to LaTeX

In [None]:
result_table = create_algorithm_result_table(metrics_empkins_outlier[["Mean Absolute Error [ms]"]])
result_table = result_table.unstack("Outlier Correction")  # .reindex_like(metric_empkins_unstack)
result_table = result_table[["Mean Absolute Error [ms]"]]

latex_output = convert_to_latex(
    result_table.style.apply(styling.highlight_min_uncertainty, axis=1),
    collapse_index_columns=False,
    column_header_bold=True,
    siunitx=False,
    environment="longtable",
    column_format="p{2.0cm}p{2.0cm}" + "S[table-format=1.1(2)]" * 3,
    caption=r"\acf{MAE} of the outlier correction algorithms for the different B-point extraction algorithms on the \textit{EmpkinS Dataset}. \ac{MAE} values are provided in milliseconds as (\(M\,\pm\,SD\)).",
    label="tab:outlier_correction_results_empkins",
)

# some manual post-processing
latex_output = latex_output.replace(r"\font-weightbold", r"\bfseries")
latex_output = latex_output.replace(
    r"\multicolumn{3}{r}{\bfseries Mean Absolute Error [ms]}",
    r"\multicolumn{3}{l}{\bfseries Outlier Correction Algorithm}",
)
latex_output = latex_output.replace(r"& Outlier Correction &", r"&  & ")
latex_output = latex_output.replace(r"Q-Peak Detection & B-Point Detection &  &  &  \\", r"")
latex_output = latex_output.replace(
    r"&  &  \bfseries For18 & \bfseries LinInt & \bfseries None",
    r"{\bfseries Q-peak\newline Algorithm} & {\bfseries B-point\newline Algorithm} & {\bfseries For18} & {\bfseries LinInt} & {\bfseries None}",
)

suppl_tab_path_paper.joinpath("tab_pep_pipeline_results_horizontal_empkins.tex").open(mode="w+").write(latex_output)

print(latex_output)

### Effect of Pipeline Combination

In [None]:
results_q_peak_empkins = load_challenge_results_from_folder(
    result_path.joinpath("empkins_dataset_q_peak"),
    index_cols_per_sample=["participant", "condition", "phase"],
)
results_b_point_empkins = load_challenge_results_from_folder(
    result_path.joinpath("empkins_dataset_b_point"),
    index_cols_per_sample=["participant", "condition", "phase"],
)
results_q_peak_per_sample_empkins = results_q_peak_empkins.per_sample
results_b_point_per_sample_empkins = results_b_point_empkins.per_sample

In [None]:
results_per_sample_total_empkins = pd.concat(
    [results_q_peak_per_sample_empkins, results_b_point_per_sample_empkins, results_per_sample_empkins]
)
results_per_sample_total_empkins

In [None]:
pep_pipeline_steps_empkins = [
    (selected_pipelines_for_plotting_empkins[0][0], "b-point-reference", "none"),
    ("q-peak-reference", selected_pipelines_for_plotting_empkins[0][1], "none"),
    selected_pipelines_for_plotting_empkins[0],
]
pep_pipeline_steps_empkins

In [None]:
q_peak_results_empkins = results_per_sample_total_empkins.xs(
    pep_pipeline_steps_empkins[0], level=algo_levels, drop_level=False
)
b_point_results_empkins = results_per_sample_total_empkins.xs(
    pep_pipeline_steps_empkins[1], level=algo_levels, drop_level=False
)
total_pipeline_results_empkins = results_per_sample_total_empkins.xs(
    pep_pipeline_steps_empkins[2], level=algo_levels, drop_level=False
)

combined_results_empkins = pd.concat(
    [q_peak_results_empkins, b_point_results_empkins, total_pipeline_results_empkins]
).sort_index()
combined_results_empkins.index = combined_results_empkins.index.set_names("id", level=-1)
index_levels = list(combined_results_empkins.index.names)
combined_results_empkins = combined_results_empkins.reset_index()
combined_results_empkins = combined_results_empkins.assign(
    pipeline=combined_results_empkins[algo_levels].agg("_".join, axis=1)
)
combined_results_empkins = combined_results_empkins.set_index(["pipeline", *index_levels])
combined_results_empkins

In [None]:
fig, axs = plt.subplots(ncols=3, figsize=(12, 6), sharey=True)

for i, (pipeline_step, ax) in enumerate(zip(pep_pipeline_steps_empkins, axs, strict=False)):
    show_upper_limit = i != 0
    fig, ax = residual_plot_pep(
        combined_results_empkins,
        algorithm=pipeline_step,
        alpha=0.5,
        show_upper_limit=show_upper_limit,
        ax=ax,
        annotate_fontsize="small",
        annotate_bbox=True,
    )
    ax.set_ylim([-150, 150])
    if i != 0:
        ax.set_ylabel(None)

fig.tight_layout()

for path in [img_path, suppl_img_path_paper]:
    fig.savefig(path.joinpath("img_residual_plots_pep_pipeline_steps_individual_lowest_empkins.pdf"), transparent=True)

In [None]:
pep_pipeline_steps_best_empkins = [
    ("martinez2004", "b-point-reference", "none"),
    ("q-peak-reference", "drost2022", "none"),
    ("martinez2004", "drost2022", "none"),
]
pep_pipeline_steps_best_empkins

In [None]:
q_peak_results_empkins_best = results_per_sample_total_empkins.xs(
    pep_pipeline_steps_best_empkins[0], level=algo_levels, drop_level=False
)
b_point_results_empkins_best = results_per_sample_total_empkins.xs(
    pep_pipeline_steps_best_empkins[1], level=algo_levels, drop_level=False
)
total_pipeline_results_empkins_best = results_per_sample_total_empkins.xs(
    pep_pipeline_steps_best_empkins[2], level=algo_levels, drop_level=False
)

combined_results_empkins_best = pd.concat(
    [q_peak_results_empkins_best, b_point_results_empkins_best, total_pipeline_results_empkins_best]
).sort_index()
combined_results_empkins_best.index = combined_results_empkins_best.index.set_names("id", level=-1)
index_levels = list(combined_results_empkins_best.index.names)
combined_results_empkins_best = combined_results_empkins_best.reset_index()
combined_results_empkins_best = combined_results_empkins_best.assign(
    pipeline=combined_results_empkins_best[algo_levels].agg("_".join, axis=1)
)
combined_results_empkins_best = combined_results_empkins_best.set_index(["pipeline", *index_levels])
combined_results_empkins_best

In [None]:
dv = "absolute_error_per_sample_ms"

fig, axs = plt.subplots(ncols=3, figsize=(12, 6), sharey=True)

for i, (pipeline_step, ax) in enumerate(zip(pep_pipeline_steps_best_empkins, axs, strict=False)):
    show_upper_limit = i != 0
    fig, ax = residual_plot_pep(
        combined_results_empkins_best,
        algorithm=pipeline_step,
        alpha=0.5,
        show_upper_limit=show_upper_limit,
        ax=ax,
        annotate_fontsize="small",
        annotate_bbox=True,
    )
    ax.set_ylim([-150, 150])
    if i != 0:
        ax.set_ylabel(None)

fig.tight_layout()

for path in [img_path, suppl_img_path_paper]:
    fig.savefig(path.joinpath("img_residual_plots_pep_pipeline_steps_overall_lowest_empkins.pdf"), transparent=True)

In [None]:
dv = "error_per_sample_ms"

fig, axs = plt.subplots(ncols=2, figsize=(12, 4), sharey=True)

data_plot_paired = add_unique_id_to_results_dataframe(
    combined_results_empkins[[dv]].droplevel(algo_levels), algo_levels="pipeline"
)
data_plot_paired_best = add_unique_id_to_results_dataframe(
    combined_results_empkins_best[[dv]].droplevel(algo_levels), algo_levels="pipeline"
)

paired_plot_error_pep_pipeline(
    data=data_plot_paired,
    pep_pipelines=[[pep_pipeline_steps_empkins[0], pep_pipeline_steps_empkins[1]]],
    dv=dv,
    axs=[axs[0]],
)

paired_plot_error_pep_pipeline(
    data=data_plot_paired_best,
    pep_pipelines=[[pep_pipeline_steps_best_empkins[0], pep_pipeline_steps_best_empkins[1]]],
    dv=dv,
    axs=[axs[1]],
);

In [None]:
display(compute_improvement_pipeline(data_plot_paired, [pep_pipeline_steps_empkins[0], pep_pipeline_steps_empkins[1]]))
display(
    compute_improvement_pipeline(
        data_plot_paired_best, [pep_pipeline_steps_best_empkins[0], pep_pipeline_steps_best_empkins[1]]
    )
)

#### Best Overall Pipeline vs. Best Individual Algorithms

In [None]:
dv = "absolute_error_per_sample_ms"
combined_results_empkins_both = pd.concat([combined_results_empkins, combined_results_empkins_best]).drop_duplicates()
pipeline_differences_empkins = [pep_pipeline_steps_empkins[-1], pep_pipeline_steps_best_empkins[-1]]

#### Best Overall Pipeline vs. Best Individual Algorithms - Combined Plot (Residual & Paired Plot)

In [None]:
fig, axs = plt.subplots(ncols=4, figsize=(12, 5), sharey=False)

for i, (pipeline_step, ax) in enumerate(zip(pipeline_differences_empkins, axs[::2], strict=False)):
    residual_plot_pep(
        combined_results_empkins_both,
        algorithm=pipeline_step,
        ax=ax,
        show_upper_limit=True,
        annotate_fontsize="small",
        annotate_bbox=True,
    )


dv = "error_per_sample_ms"
data_plot_paired_both = add_unique_id_to_results_dataframe(
    combined_results_empkins_both[[dv]].droplevel(algo_levels), algo_levels="pipeline"
)

paired_plot_error_pep_pipeline(
    data=data_plot_paired_both,
    pep_pipelines=[[pep_pipeline_steps_empkins[0], pep_pipeline_steps_empkins[1]]],
    dv=dv,
    colors=["#FDB735", "#8C9FB1", "#18B4F1"],
    axs=[axs[1]],
)

paired_plot_error_pep_pipeline(
    data=data_plot_paired_both,
    pep_pipelines=[[pep_pipeline_steps_best_empkins[0], pep_pipeline_steps_best_empkins[1]]],
    dv=dv,
    colors=["#FDB735", "#8C9FB1", "#18B4F1"],
    axs=[axs[3]],
)

axs[0].set_title(None)
axs[2].set_title(None)
axs[0].set_ylim([-225, 150])
axs[2].set_ylim([-225, 150])
axs[1].set_xticklabels(
    [
        f"Q-Peak:\n{rename_algorithms(pipeline_differences_empkins[0][0])}",
        f"B-Point:\n{rename_algorithms(pipeline_differences_empkins[0][1])}",
    ]
)
axs[3].set_xticklabels(
    [
        f"Q-Peak:\n{rename_algorithms(pipeline_differences_empkins[1][0])}",
        f"B-Point:\n{rename_algorithms(pipeline_differences_empkins[1][1])}",
    ]
)
axs[1].set_xlabel("PEP Pipeline Algorithms")
axs[3].set_xlabel("PEP Pipeline Algorithms")

fig.text(
    x=0.25,
    y=0.95,
    s=f"PEP Pipeline (Overall Lowest): {' | '.join(rename_algorithms(pipeline_differences_empkins[0]))}",
    fontdict={"fontweight": "bold"},
    ha="center",
)
fig.text(
    x=0.75,
    y=0.95,
    s=f"PEP Pipeline (Individually Lowest): {' | '.join(rename_algorithms(pipeline_differences_empkins[1]))}",
    fontdict={"fontweight": "bold"},
    ha="center",
)
fig.align_xlabels()

fig.tight_layout(rect=(0, 0, 1, 0.925), pad=0)
for path in [img_path, img_path_paper]:
    fig.savefig(path.joinpath("img_pep_pipelines_total_empkins.pdf"), transparent=True)

In [None]:
dv = "absolute_error_per_sample_ms"

combined_results_empkins_both = pd.concat([combined_results_empkins, combined_results_empkins_best]).drop_duplicates()
data_plot_paired_best = add_unique_id_to_results_dataframe(
    combined_results_empkins_both[[dv]].droplevel(algo_levels), algo_levels="pipeline"
)

fig, axs = plt.subplots(ncols=1, figsize=(6, 4), sharey=True, squeeze=False)
axs = axs[0]
fig, axs = paired_plot_error_pep_pipeline(
    data=data_plot_paired_best, pep_pipelines=[pipeline_differences_empkins], dv=dv, axs=axs
)

## Guardian Dataset

In [None]:
dataset_guardian = GuardianDataset(guardian_base_path, use_cache=True, only_labeled=True)
dataset_guardian

In [None]:
results_guardian = load_challenge_results_from_folder(
    result_path.joinpath("guardian_dataset_both_algorithms"),
    index_cols_per_sample=["participant", "phase"],
)

In [None]:
results_per_sample_guardian = results_guardian.per_sample
results_agg_total_guardian = results_guardian.agg_total
results_per_sample_guardian.head()

### Results Table

In [None]:
metrics_guardian = compute_pep_performance_metrics(
    results_per_sample_guardian, num_heartbeats=results_agg_total_guardian
)
metrics_guardian.head(n=20).style.highlight_min(
    subset=["Mean Absolute Error [ms]", "Mean Absolute Relative Error [%]"], props="background-color: LightGreen;"
)

In [None]:
result_table = create_algorithm_result_table(metrics_guardian)

latex_output = convert_to_latex(
    result_table,
    collapse_index_columns=False,
    column_header_bold=True,
    environment="longtable",
    column_format="p{2.25cm}p{2.0cm}p{2.0cm}S[table-format=1.1(2)]S[table-format=1.1(2)]S[table-format=1.1(2)]p{2.0cm}",
    caption=r"Results of combined \ac{PEP} extraction pipelines on the \textit{Guardian Dataset}. The pipelines are sorted by the MAE in ascending order.",
    label="tab:b_point_results_guardian",
)

# some manual post-processing
latex_output = latex_output.replace(r"\ac", r"")
latex_output = latex_output.replace(r"Van13 ", r"Van13\,")
latex_output = latex_output.replace(r"\begin{longtable}", r"\begin{small}\begin{longtable}")
latex_output = latex_output.replace(r"\end{longtable}", r"\end{longtable}\end{small}")
latex_output = latex_output.replace(r"\font-weightbold ", r"\bfseries ")
latex_output = latex_output.replace(r"{B-Point Detection}", r"{B-Point\newline Detection}")
latex_output = latex_output.replace(r"{Q-Peak Detection}", r"{Q-Peak\newline Detection}")
latex_output = latex_output.replace(r"{Outlier Correction}", r"{Outlier\newline Correction}")

suppl_tab_path_paper.joinpath("tab_pep_pipeline_results_guardian.tex").open(mode="w+").write(latex_output)
print(latex_output)

### Plots

In [None]:
selected_pipelines_for_plotting_guardian = [
    ("forouzanfar2018", "drost2022", "none"),
    ("vanlien2013-32-ms", "drost2022", "none"),
    ("forouzanfar2018", "arbol2017-third-derivative", "none"),
    ("forouzanfar2018", "lozano2007-linear-regression", "none"),
]

In [None]:
results_guardian_plot = multi_xs(
    data=results_per_sample_guardian, keys=selected_pipelines_for_plotting_guardian, level=algo_levels
)
results_guardian_plot.head()

#### Absolute Error

In [None]:
fig, ax = boxplot_algorithm_performance(
    results_guardian_plot,
    metric="absolute_error_per_sample_ms",
    showmeans=True,
    figsize=(6, 5),
)

fig.savefig(img_path.joinpath("img_boxplot_pep_pipelines_mae_guardian.pdf"), transparent=True)

In [None]:
fig, ax = violinplot_algorithm_performance(
    results_guardian_plot,
    metric="absolute_error_per_sample_ms",
    figsize=(6, 5),
)

#### Absolute Error (with and without Outlier)

In [None]:
fig, axs = plt.subplots(ncols=2, figsize=(10, 3))

boxplot_algorithm_performance(
    results_guardian_plot,
    metric="absolute_error_per_sample_ms",
    showmeans=True,
    showfliers=True,
    width=0.9,
    title="PEP Pipeline Results – With Outlier",
    fig=fig,
    ax=axs[0],
)
boxplot_algorithm_performance(
    results_guardian_plot,
    metric="absolute_error_per_sample_ms",
    showmeans=True,
    showfliers=False,
    width=0.9,
    title="PEP Pipeline Results – Without Outlier",
    fig=fig,
    ax=axs[1],
)
for ax in axs:
    ax.set_xlabel(None)
fig.tight_layout()

for path in [img_path, suppl_img_path_paper]:
    fig.savefig(path.joinpath("img_boxplot_pep_pipelines_mae_with_without_outlier_guardian.pdf"), transparent=True)

#### Error

In [None]:
fig, ax = violinplot_algorithm_performance(
    results_guardian_plot,
    metric="error_per_sample_ms",
    figsize=(6, 5),
)

#### Error per Participant

In [None]:
error_per_participant_guardian = get_error_by_group(results_per_sample_guardian, grouper="participant")
error_per_participant_guardian = multi_xs(
    error_per_participant_guardian, selected_pipelines_for_plotting_guardian, level=algo_levels, axis=1
)
error_per_participant_guardian = error_per_participant_guardian.round(2)
error_per_participant_guardian = error_per_participant_guardian.rename(columns=rename_algorithms).rename(
    columns=rename_metrics
)

error_per_participant_guardian.style.highlight_max(props="background-color: Pink;")

In [None]:
latex_output = convert_to_latex(
    error_per_participant_guardian.style.highlight_max(props="background-color: Pink;").format_index(
        escape="latex", axis=0
    ),
    collapse_index_columns=False,
    column_header_bold=True,
    column_format="p{3.5cm}" + "S[table-format=2.2]" * len(error_per_participant_empkins.columns),
    caption=r"Mean Abolute Error of selected PEP extraction pipelines on the \textit{Guardian Dataset} per participant. The values with the highest errors are highlighted in red.",
    label="tab:pep_pipeline_results_per_participant_guardian",
)

# fix pandas bug that does not format the last column name in bold
latex_output = latex_output.replace(r"\begin{table}[ht]", r"\begin{table}[ht]\footnotesize")
latex_output = latex_output.replace(r"q_peak_algorithm", r"\bfseries Q-peak Algorithm")
latex_output = latex_output.replace(r"b_point_algorithm", r"\bfseries B-point Algorithm")
latex_output = latex_output.replace(r"outlier_correction_algorithm", r"\bfseries Outlier Correction Algorithm")
latex_output = latex_output.replace(r"{participant}", r"{Participant}")
latex_output = latex_output.replace(r"{metric}", r"{}")
latex_output = latex_output.replace(r"{\bfseries mean}", r"{Mean}")
latex_output = latex_output.replace(r"{\bfseries std}", r"{SD}")
latex_output = latex_output.replace(r"{std}", r"{SD}")
latex_output = latex_output.replace(r"\sisetup{", r"\sisetup{round-mode=places,round-precision=2,")

suppl_tab_path_paper.joinpath("tab_pep_pipeline_results_per_participant_guardian.tex").open(mode="w+").write(
    latex_output
)

print(latex_output)

#### Residual Plots

In [None]:
selected_pipelines_for_residual_guardian = [
    ("forouzanfar2018", "drost2022", "none"),
    # ("vanlien2013-32-ms", "drost2022", "none"),
    ("forouzanfar2018", "arbol2017-third-derivative", "none"),
    ("forouzanfar2018", "lozano2007-linear-regression", "none"),
]

In [None]:
fig, axs = plt.subplots(ncols=3, figsize=(12, 4), sharey=True)

for i, ax in enumerate(axs):
    fig, ax = residual_plot_pep(
        results_guardian_plot,
        selected_pipelines_for_residual_guardian[i],
        alpha=0.5,
        show_upper_limit=True,
        ax=ax,
        annotate_fontsize="small",
        annotate_bbox=True,
    )
    if i != 0:
        ax.set_ylabel(None)
axs[0].set_ylim([-125, 225])

fig.tight_layout()

for path in [img_path, suppl_img_path_paper]:
    fig.savefig(path.joinpath("img_residual_plots_pep_pipelines_guardian.pdf"), transparent=True)

In [None]:
fig, axs = plt.subplots(ncols=3, figsize=(12, 4), sharey=True)

for i, ax in enumerate(axs):
    fig, ax = residual_plot_pep_participant(
        results_guardian_plot,
        selected_pipelines_for_residual_guardian[i],
        alpha=0.5,
        show_upper_limit=True,
        ax=ax,
        annotate_fontsize="small",
        annotate_bbox=True,
    )
    if i != 0:
        ax.set_ylabel(None)
axs[0].set_ylim([-125, 225])

fig.tight_layout()

for path in [img_path, suppl_img_path_paper]:
    fig.savefig(path.joinpath("img_residual_plots_pep_pipelines_per_participant_guardian.pdf"), transparent=True)

In [None]:
fig, axs = plt.subplots(ncols=3, figsize=(12, 4), sharey=True)

for i, ax in enumerate(axs):
    fig, ax = residual_plot_pep_phase(
        results_guardian_plot,
        selected_pipelines_for_residual_guardian[i],
        alpha=0.5,
        show_upper_limit=True,
        ax=ax,
        annotate_fontsize="small",
        annotate_bbox=True,
    )
    if i != 0:
        ax.set_ylabel(None)

axs[0].set_ylim([-125, 225])

for path in [img_path, suppl_img_path_paper]:
    fig.savefig(path.joinpath("img_residual_plots_pep_pipelines_per_phase_guardian.pdf"), transparent=True)

In [None]:
fig, axs = plt.subplots(ncols=3, figsize=(12, 4), sharey=True)

for i, ax in enumerate(axs):
    fig, ax = residual_plot_pep_heart_rate(
        results_guardian_plot,
        selected_pipelines_for_residual_guardian[i],
        alpha=0.5,
        show_upper_limit=True,
        ax=ax,
        annotate_fontsize="small",
        annotate_bbox=True,
    )
    if i != 0:
        ax.set_ylabel(None)

axs[0].set_ylim([-125, 225])

for path in [img_path, suppl_img_path_paper]:
    fig.savefig(path.joinpath("img_residual_plots_pep_pipelines_heart_rate_guardian.pdf"), transparent=True)

In [None]:
fig, axs = plt.subplots(ncols=3, figsize=(12, 4), sharey=True)

for i, ax in enumerate(axs):
    fig, ax = regplot_error_heart_rate(
        results_per_sample_guardian,
        selected_pipelines_for_residual_guardian[i],
        error_metric="absolute_error_per_sample_ms",
        add_corr_coeff=True,
        ax=ax,
    )
    if i != 0:
        ax.set_ylabel(None)

fig.tight_layout()

for path in [img_path, suppl_img_path_paper]:
    fig.savefig(path.joinpath("img_pep_pipelines_error_heart_rate_guardian.pdf"), transparent=True)

#### Effect of Outlier Correction on Estimation Error

In [None]:
metrics_guardian_outlier = metrics_guardian.groupby(["q_peak_algorithm", "b_point_algorithm"], group_keys=False).apply(
    lambda df: df.reindex(outlier_algos, level="outlier_correction_algorithm")
)
metrics_guardian_outlier.head(n=10).style.apply(
    styling.highlight_outlier_improvement, subset=["Mean Absolute Error [ms]", "Invalid PEPs"]
)

#### Horizontal Table

In [None]:
metric_guardian_unstack = metrics_guardian_outlier[["Mean Absolute Error [ms]"]].unstack("outlier_correction_algorithm")
metric_guardian_unstack = metric_guardian_unstack.reorder_levels([0, 2, 1], axis=1)
metric_guardian_unstack = metric_guardian_unstack.sort_index(axis=1).reindex(
    outlier_algos, level="outlier_correction_algorithm", axis=1
)
metric_guardian_unstack_style = metric_guardian_unstack.style.highlight_min(
    subset=list(filter(lambda s: "mean" in s, metric_guardian_unstack.columns)), axis=1, props="font-weight: bold;"
)
metric_guardian_unstack_style

#### Horizontal Table to LaTeX

In [None]:
result_table = create_algorithm_result_table(metrics_guardian_outlier[["Mean Absolute Error [ms]"]])
result_table = result_table.unstack("Outlier Correction")  # .reindex_like(metric_guardian_unstack)
result_table = result_table[["Mean Absolute Error [ms]"]]

latex_output = convert_to_latex(
    result_table.style.apply(styling.highlight_min_uncertainty, axis=1),
    collapse_index_columns=False,
    column_header_bold=True,
    siunitx=False,
    environment="longtable",
    column_format="p{2.0cm}p{2.0cm}" + "S[table-format=1.1(2)]" * 3,
    caption=r"\acf{MAE} of the outlier correction algorithms for the different B-point extraction algorithms on the \textit{Guardian Dataset}. \ac{MAE} values are provided in milliseconds as (\(M\,\pm\,SD\)).",
    label="tab:outlier_correction_results_guardian",
)

# some manual post-processing
latex_output = latex_output.replace(r"\font-weightbold", r"\bfseries")
latex_output = latex_output.replace(
    r"\multicolumn{3}{r}{\bfseries Mean Absolute Error [ms]}",
    r"\multicolumn{3}{l}{\bfseries Outlier Correction Algorithm}",
)
latex_output = latex_output.replace(r"& Outlier Correction &", r"&  & ")
latex_output = latex_output.replace(r"Q-Peak Detection & B-Point Detection &  &  &  \\", r"")
latex_output = latex_output.replace(
    r"&  &  \bfseries For18 & \bfseries LinInt & \bfseries None",
    r"{\bfseries Q-peak\newline Algorithm} & {\bfseries B-point\newline Algorithm} & {\bfseries For18} & {\bfseries LinInt} & {\bfseries None}",
)

suppl_tab_path_paper.joinpath("tab_pep_pipeline_results_horizontal_guardian.tex").open(mode="w+").write(latex_output)

print(latex_output)

### Effect of Pipeline Combination

In [None]:
results_q_peak_guardian = load_challenge_results_from_folder(
    result_path.joinpath("guardian_dataset_q_peak"),
    index_cols_per_sample=["participant", "phase"],
)
results_b_point_guardian = load_challenge_results_from_folder(
    result_path.joinpath("guardian_dataset_b_point"),
    index_cols_per_sample=["participant", "phase"],
)
results_q_peak_per_sample_guardian = results_q_peak_guardian.per_sample
results_b_point_per_sample_guardian = results_b_point_guardian.per_sample

In [None]:
results_per_sample_total_guardian = pd.concat(
    [results_q_peak_per_sample_guardian, results_b_point_per_sample_guardian, results_per_sample_guardian]
)
results_per_sample_total_guardian

In [None]:
pep_pipeline_steps_guardian = [
    (selected_pipelines_for_plotting_guardian[0][0], "b-point-reference", "none"),
    ("q-peak-reference", selected_pipelines_for_plotting_guardian[0][1], "none"),
    selected_pipelines_for_plotting_guardian[0],
]
pep_pipeline_steps_guardian

In [None]:
q_peak_results_guardian = results_per_sample_total_guardian.xs(
    pep_pipeline_steps_guardian[0], level=algo_levels, drop_level=False
)
b_point_results_guardian = results_per_sample_total_guardian.xs(
    pep_pipeline_steps_guardian[1], level=algo_levels, drop_level=False
)
total_pipeline_results_guardian = results_per_sample_total_guardian.xs(
    pep_pipeline_steps_guardian[2], level=algo_levels, drop_level=False
)

combined_results_guardian = pd.concat(
    [q_peak_results_guardian, b_point_results_guardian, total_pipeline_results_guardian]
).sort_index()
combined_results_guardian.index = combined_results_guardian.index.set_names("id", level=-1)
index_levels = list(combined_results_guardian.index.names)
combined_results_guardian = combined_results_guardian.reset_index()
combined_results_guardian = combined_results_guardian.assign(
    pipeline=combined_results_guardian[algo_levels].agg("_".join, axis=1)
)
combined_results_guardian = combined_results_guardian.set_index(["pipeline", *index_levels])
combined_results_guardian

In [None]:
fig, axs = plt.subplots(ncols=3, figsize=(12, 6), sharey=True)

for i, (pipeline_step, ax) in enumerate(zip(pep_pipeline_steps_guardian, axs, strict=False)):
    show_upper_limit = i != 0
    fig, ax = residual_plot_pep(
        combined_results_guardian,
        algorithm=pipeline_step,
        alpha=0.5,
        show_upper_limit=show_upper_limit,
        ax=ax,
        annotate_fontsize="small",
        annotate_bbox=True,
    )
    ax.set_ylim([-110, 120])
    if i != 0:
        ax.set_ylabel(None)

fig.tight_layout()

for path in [img_path, suppl_img_path_paper]:
    fig.savefig(path.joinpath("img_residual_plots_pep_pipeline_steps_individual_lowest_guardian.pdf"), transparent=True)

In [None]:
pep_pipeline_steps_best_guardian = [
    ("martinez2004", "b-point-reference", "none"),
    ("q-peak-reference", "lozano2007-linear-regression", "none"),
    ("martinez2004", "lozano2007-linear-regression", "none"),
]
pep_pipeline_steps_best_guardian

In [None]:
q_peak_results_guardian_best = results_per_sample_total_guardian.xs(
    pep_pipeline_steps_best_guardian[0], level=algo_levels, drop_level=False
)
b_point_results_guardian_best = results_per_sample_total_guardian.xs(
    pep_pipeline_steps_best_guardian[1], level=algo_levels, drop_level=False
)
total_pipeline_results_guardian_best = results_per_sample_total_guardian.xs(
    pep_pipeline_steps_best_guardian[2], level=algo_levels, drop_level=False
)

combined_results_guardian_best = pd.concat(
    [q_peak_results_guardian_best, b_point_results_guardian_best, total_pipeline_results_guardian_best]
).sort_index()
combined_results_guardian_best.index = combined_results_guardian_best.index.set_names("id", level=-1)
index_levels = list(combined_results_guardian_best.index.names)
combined_results_guardian_best = combined_results_guardian_best.reset_index()
combined_results_guardian_best = combined_results_guardian_best.assign(
    pipeline=combined_results_guardian_best[algo_levels].agg("_".join, axis=1)
)
combined_results_guardian_best = combined_results_guardian_best.set_index(["pipeline", *index_levels])
combined_results_guardian_best

In [None]:
dv = "absolute_error_per_sample_ms"

fig, axs = plt.subplots(ncols=3, figsize=(12, 6), sharey=True)

for i, (pipeline_step, ax) in enumerate(zip(pep_pipeline_steps_best_empkins, axs, strict=False)):
    show_upper_limit = i != 0
    fig, ax = residual_plot_pep(
        combined_results_empkins_best,
        algorithm=pipeline_step,
        alpha=0.5,
        show_upper_limit=show_upper_limit,
        ax=ax,
        annotate_fontsize="small",
        annotate_bbox=True,
    )
    ax.set_ylim([-110, 120])
    if i != 0:
        ax.set_ylabel(None)

fig.tight_layout()

for path in [img_path, suppl_img_path_paper]:
    fig.savefig(path.joinpath("img_residual_plots_pep_pipeline_steps_overall_lowest_guardian.pdf"), transparent=True)

In [None]:
dv = "error_per_sample_ms"

fig, axs = plt.subplots(ncols=2, figsize=(12, 6), sharey=True)

data_plot_paired = add_unique_id_to_results_dataframe(
    combined_results_guardian[[dv]].droplevel(algo_levels), algo_levels="pipeline"
)
data_plot_paired_best = add_unique_id_to_results_dataframe(
    combined_results_guardian_best[[dv]].droplevel(algo_levels), algo_levels="pipeline"
)

paired_plot_error_pep_pipeline(
    data=data_plot_paired,
    pep_pipelines=[[pep_pipeline_steps_guardian[0], pep_pipeline_steps_guardian[1]]],
    dv=dv,
    axs=[axs[0]],
)

paired_plot_error_pep_pipeline(
    data=data_plot_paired_best,
    pep_pipelines=[[pep_pipeline_steps_best_guardian[0], pep_pipeline_steps_best_guardian[1]]],
    dv=dv,
    axs=[axs[1]],
);

In [None]:
display(
    compute_improvement_pipeline(data_plot_paired, [pep_pipeline_steps_guardian[0], pep_pipeline_steps_guardian[1]])
)
display(
    compute_improvement_pipeline(
        data_plot_paired_best, [pep_pipeline_steps_best_guardian[0], pep_pipeline_steps_best_guardian[1]]
    )
)

#### Best Overall Pipeline vs. Best Individual Algorithms

In [None]:
dv = "absolute_error_per_sample_ms"
combined_results_guardian_both = pd.concat(
    [combined_results_guardian, combined_results_guardian_best]
).drop_duplicates()
pipeline_differences_guardian = [pep_pipeline_steps_guardian[-1], pep_pipeline_steps_best_guardian[-1]]

In [None]:
fig, axs = plt.subplots(ncols=2, figsize=(12, 6), sharey=True)

for i, (pipeline_step, ax) in enumerate(zip(pipeline_differences_guardian, axs, strict=False)):
    residual_plot_pep(combined_results_guardian_both, algorithm=pipeline_step, ax=ax, show_upper_limit=True)

axs[0].set_title(
    f"PEP Pipeline (Overall Lowest):\n{' | '.join(rename_algorithms(pipeline_differences_guardian[1]))}",
    fontweight="bold",
)
axs[1].set_title(
    f"PEP Pipeline (Individually Lowest):\n{' | '.join(rename_algorithms(pipeline_differences_guardian[0]))}",
    fontweight="bold",
)
axs[-1].set_ylabel(None)
axs[0].set_ylim([-200, 200])

fig.tight_layout()

#### Best Overall Pipeline vs. Best Individual Algorithms - Combined Plot (Residual & Paired Plot)

In [None]:
fig, axs = plt.subplots(ncols=4, figsize=(12, 5), sharey=False)

for i, (pipeline_step, ax) in enumerate(zip(pipeline_differences_guardian, axs[::2], strict=False)):
    residual_plot_pep(
        combined_results_guardian_both,
        algorithm=pipeline_step,
        ax=ax,
        show_upper_limit=True,
        annotate_bbox=True,
        annotate_fontsize="small",
    )

dv = "error_per_sample_ms"
data_plot_paired_both = add_unique_id_to_results_dataframe(
    combined_results_guardian_both[[dv]].droplevel(algo_levels), algo_levels="pipeline"
)

paired_plot_error_pep_pipeline(
    data=data_plot_paired_both,
    pep_pipelines=[[pep_pipeline_steps_guardian[0], pep_pipeline_steps_guardian[1]]],
    dv=dv,
    colors=["#FDB735", "#8C9FB1", "#18B4F1"],
    axs=[axs[1]],
)

paired_plot_error_pep_pipeline(
    data=data_plot_paired_both,
    pep_pipelines=[[pep_pipeline_steps_best_guardian[0], pep_pipeline_steps_best_guardian[1]]],
    dv=dv,
    colors=["#FDB735", "#8C9FB1", "#18B4F1"],
    axs=[axs[3]],
)

axs[0].set_title(None)
axs[2].set_title(None)
axs[0].set_ylim([-225, 150])
axs[2].set_ylim([-225, 150])
axs[1].set_xticklabels(
    [
        f"Q-Peak:\n{rename_algorithms(pipeline_differences_empkins[0][0])}",
        f"B-Point:\n{rename_algorithms(pipeline_differences_empkins[0][1])}",
    ]
)
axs[3].set_xticklabels(
    [
        f"Q-Peak:\n{rename_algorithms(pipeline_differences_empkins[1][0])}",
        f"B-Point:\n{rename_algorithms(pipeline_differences_empkins[1][1])}",
    ]
)
axs[1].set_xlabel("PEP Pipeline Algorithms")
axs[3].set_xlabel("PEP Pipeline Algorithms")


fig.text(
    x=0.25,
    y=0.95,
    s=f"PEP Pipeline (Overall Lowest): {' | '.join(rename_algorithms(pipeline_differences_guardian[0]))}",
    fontdict={"fontweight": "bold"},
    ha="center",
)
fig.text(
    x=0.75,
    y=0.95,
    s=f"PEP Pipeline (Individually Lowest): {' | '.join(rename_algorithms(pipeline_differences_guardian[1]))}",
    fontdict={"fontweight": "bold"},
    ha="center",
)
fig.align_xlabels()

fig.tight_layout(rect=(0, 0, 1, 0.925), pad=0)
for path in [img_path, img_path_paper]:
    fig.savefig(path.joinpath("img_pep_pipelines_total_guardian.pdf"), transparent=True)

In [None]:
dv = "absolute_error_per_sample_ms"

combined_results_guardian_both = pd.concat(
    [combined_results_guardian, combined_results_guardian_best]
).drop_duplicates()
data_plot_paired_best = add_unique_id_to_results_dataframe(
    combined_results_guardian_both[[dv]].droplevel(algo_levels), algo_levels="pipeline"
)

fig, axs = plt.subplots(ncols=1, figsize=(6, 6), sharey=True, squeeze=False)
axs = axs[0]
paired_plot_error_pep_pipeline(
    data=data_plot_paired_best, pep_pipelines=[pipeline_differences_guardian], dv=dv, axs=axs
);

## Combined Output


### Joint Result Table

In [None]:
result_table_empkins = create_algorithm_result_table(
    pd.concat([metrics_empkins.head(n=1), metrics_empkins.loc[[pep_pipeline_steps_best_empkins[-1]]]])
)
result_table_guardian = create_algorithm_result_table(
    pd.concat([metrics_guardian.head(n=1), metrics_guardian.loc[[pep_pipeline_steps_best_guardian[-1]]]])
)
result_table_combined = pd.concat(
    {"EmpkinS Dataset": result_table_empkins, "Guardian Dataset": result_table_guardian}, names=["Dataset"]
)

result_table_combined = result_table_combined.assign(Type=["OL", "IL"] * 2)
result_table_combined = result_table_combined.set_index("Type", append=True)
result_table_combined = result_table_combined.reorder_levels([0, -1, 1, 2, 3])
# result_table_combined = result_table_combined[[result_table_combined.columns[-1]] + list(result_table_combined.columns[:-1])]
# result_table_combined
result_table_combined

In [None]:
latex_output = convert_to_latex(
    result_table_combined,
    collapse_index_columns=False,
    column_header_bold=True,
    environment="table*",
    column_format="p{1.60cm}p{1.00cm}p{1.60cm}p{1.60cm}p{1.60cm}S[table-format=1.1(2)]S[table-format=1.1(2)]S[table-format=1.1(2)]p{2.0cm}",
    caption=r"Results of selected combined \ac{PEP} extraction pipelines per dataset. The different pipeline types are: \textit{OL = overall lowest}, i.e., the combined \ac{PEP} pipelines with the overall lowest \ac{MAE} on the \textit{EmpkinS Dataset} and the \textit{Guardian Dataset}, respectively; \textit{IL = individually lowest}, i.e., the combined \ac{PEP} pipeline consisting of the algorithms with the individually lowest \ac{MAE}. Results of all \ac{PEP} pipelines are available in the Supplementary Material, Tables~S6 (\textit{EmpkinS Dataset}) and~S14 (\textit{Guardian Dataset}).",
    label="tab:pep_pipelines_combined",
)

# some manual post-processing
latex_output = latex_output.replace(
    r"""
\cline{1-9} \cline{2-9} \cline{3-9} \cline{4-9}
\bottomrule""",
    r"\bottomrule",
)
latex_output = latex_output.replace(r"Van13 ", r"Van13\,")
latex_output = latex_output.replace(r"Invalid\newline ", r"Invalid ")
latex_output = latex_output.replace("{Dataset}", r"{\bfseries Dataset}")
latex_output = latex_output.replace(
    r"{} & {} & {} & {} & {} & ",
    r"{\bfseries Dataset} & {\bfseries Type} & {\bfseries Q-peak\newline Algorithm} & {\bfseries B-point\newline Algorithm} & {\bfseries Outlier\newline Correction} & ",
)
latex_output = latex_output.replace(
    r"{\bfseries Dataset} & {Type} & {Q-Peak Detection} & {B-Point Detection} & {Outlier Correction} & {} & {} & {} & {} \\",
    "",
)
latex_output = latex_output.replace("{*}{EmpkinS Dataset}", r"{*}{EmpkinS}")
latex_output = latex_output.replace("{*}{Guardian Dataset}", r"{*}{Guardian}")

tab_path_paper.joinpath("tab_pep_pipeline_results_combined.tex").open(mode="w+").write(latex_output)
print(latex_output)