# Analysis Rater Influence - EmpkinS Dataset

## Setup and Helper Functions

### Imports

In [None]:
import json
import re
from pathlib import Path

import biopsykit as bp
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from fau_colors import cmaps, register_fausans_font

from pepbench.annotations import load_annotations_from_dataset
from pepbench.annotations.stats import add_annotation_agreement_to_results_dataframe
from pepbench.data_handling import (
    compute_pep_performance_metrics,
    merge_result_metrics_from_multiple_annotators,
    merge_results_per_sample_from_different_annotators,
)
from pepbench.datasets import EmpkinsDataset, GuardianDataset
from pepbench.export import (
    convert_to_latex,
)
from pepbench.io import load_challenge_results_from_folder
from pepbench.utils import rename_algorithms, rename_metrics

%matplotlib widget
%load_ext autoreload
%autoreload 2

In [None]:
register_fausans_font()
plt.close("all")

palette = sns.color_palette(cmaps.faculties_light)
sns.set_theme(context="notebook", style="ticks", font="sans-serif", palette=palette)

plt.rcParams["figure.figsize"] = (10, 5)
plt.rcParams["pdf.fonttype"] = 42
plt.rcParams["mathtext.default"] = "regular"
plt.rcParams["font.family"] = "sans-serif"
plt.rcParams["font.sans-serif"] = "FAUSans Office"

palette

In [None]:
root_path = Path("../../..")

In [None]:
deploy_type = "local"

config_dict = json.load(root_path.joinpath("config.json").open(encoding="utf-8"))

empkins_base_path = Path(config_dict[deploy_type]["empkins_path"])
guardian_base_path = Path(config_dict[deploy_type]["guardian_path"])
print(empkins_base_path)

### Input Paths

In [None]:
result_path = root_path.joinpath("results")

### Output Paths

In [None]:
paper_path = json.load(root_path.joinpath("paper_path.json").open(encoding="utf-8"))["paper_path"]
paper_path = Path(paper_path)

export_path = root_path.joinpath("exports")
img_path = export_path.joinpath("plots")
stats_path = export_path.joinpath("stats")

img_path_paper = paper_path.joinpath("img")
tab_path_paper = paper_path.joinpath("tab")
suppl_img_path_paper = paper_path.joinpath("supplementary_material/img")
suppl_tab_path_paper = paper_path.joinpath("supplementary_material/tab")

bp.utils.file_handling.mkdirs(
    [
        result_path,
        export_path,
        img_path,
        stats_path,
        img_path_paper,
        tab_path_paper,
        suppl_img_path_paper,
        suppl_tab_path_paper,
    ]
)

## EmpkinS Dataset

In [None]:
dataset_empkins_01 = EmpkinsDataset(empkins_base_path, use_cache=True, only_labeled=True, label_type="rater_01")
dataset_empkins_01

In [None]:
dataset_empkins_02 = EmpkinsDataset(empkins_base_path, use_cache=True, only_labeled=True, label_type="rater_02")
dataset_empkins_02

### Load Data

#### Q-Peak

In [None]:
results_q_peak_empkins_01 = load_challenge_results_from_folder(
    result_path.joinpath("empkins_dataset_q_peak/rater_01"),
    index_cols_per_sample=["participant", "condition", "phase"],
)

In [None]:
results_q_peak_empkins_02 = load_challenge_results_from_folder(
    result_path.joinpath("empkins_dataset_q_peak/rater_02"),
    index_cols_per_sample=["participant", "condition", "phase"],
)

In [None]:
results_per_sample_q_peak_empkins_01 = results_q_peak_empkins_01.per_sample.droplevel([1, 2])
results_agg_total_q_peak_empkins_01 = results_q_peak_empkins_01.agg_total.droplevel([1, 2])

results_per_sample_q_peak_empkins_02 = results_q_peak_empkins_02.per_sample.droplevel([1, 2])
results_agg_total_q_peak_empkins_02 = results_q_peak_empkins_02.agg_total.droplevel([1, 2])

results_per_sample_q_peak_empkins_02.head()

#### B-Point

In [None]:
results_b_point_empkins_01 = load_challenge_results_from_folder(
    result_path.joinpath("empkins_dataset_b_point/rater_01"),
    index_cols_per_sample=["participant", "condition", "phase"],
)

In [None]:
results_b_point_empkins_02 = load_challenge_results_from_folder(
    result_path.joinpath("empkins_dataset_b_point/rater_02"),
    index_cols_per_sample=["participant", "condition", "phase"],
)

In [None]:
results_per_sample_b_point_empkins_01 = results_b_point_empkins_01.per_sample.droplevel([0])
results_agg_total_b_point_empkins_01 = results_b_point_empkins_01.agg_total.droplevel([0])

results_per_sample_b_point_empkins_02 = results_b_point_empkins_02.per_sample.droplevel([0])
results_agg_total_b_point_empkins_02 = results_b_point_empkins_02.agg_total.droplevel([0])
results_per_sample_b_point_empkins_02.head()

#### Pipeline

In [None]:
results_pipeline_empkins_01 = load_challenge_results_from_folder(
    result_path.joinpath("empkins_dataset_both_algorithms/rater_01"),
    index_cols_per_sample=["participant", "condition", "phase"],
)
results_pipeline_empkins_02 = load_challenge_results_from_folder(
    result_path.joinpath("empkins_dataset_both_algorithms/rater_02"),
    index_cols_per_sample=["participant", "condition", "phase"],
)

In [None]:
results_per_sample_pipeline_empkins_01 = results_pipeline_empkins_01.per_sample
results_agg_total_pipeline_empkins_01 = results_pipeline_empkins_01.agg_total

results_per_sample_pipeline_empkins_02 = results_pipeline_empkins_02.per_sample
results_agg_total_pipeline_empkins_02 = results_pipeline_empkins_02.agg_total
results_per_sample_pipeline_empkins_02.head()

In [None]:
result_dict_empkins = {}
result_dict_guardian = {}

### Compute Metrics

#### Q-Peak

In [None]:
metrics_q_peak_empkins_01 = compute_pep_performance_metrics(
    results_per_sample_q_peak_empkins_01, num_heartbeats=results_agg_total_q_peak_empkins_01
)
metrics_q_peak_empkins_02 = compute_pep_performance_metrics(
    results_per_sample_q_peak_empkins_02, num_heartbeats=results_agg_total_q_peak_empkins_02
)

In [None]:
metrics_q_peak_empkins = merge_result_metrics_from_multiple_annotators(
    [metrics_q_peak_empkins_01, metrics_q_peak_empkins_02]
)

In [None]:
metrics_q_peak_empkins = metrics_q_peak_empkins.rename(index=rename_algorithms).rename(columns=rename_metrics)
metrics_q_peak_empkins

In [None]:
mean_annotator_difference_q_peak_empkins = (
    metrics_q_peak_empkins[[("Annotator Difference", "Mean Absolute Error [ms]", "Mean")]].abs().describe().T
)
result_dict_empkins["Annotator_Difference_Q_Peak"] = mean_annotator_difference_q_peak_empkins
mean_annotator_difference_q_peak_empkins

In [None]:
metrics_q_peak_empkins_style = metrics_q_peak_empkins.style.background_gradient(
    subset=[("Annotator 1", "Mean Absolute Error [ms]", "Mean"), ("Annotator 2", "Mean Absolute Error [ms]", "Mean")]
)
latex_output = convert_to_latex(
    metrics_q_peak_empkins_style,
    collapse_index_columns=False,
    column_header_bold=True,
    column_format="p{3.0cm}"
    + "S[table-column-width=0.75cm,table-format=2.2]" * len(metrics_q_peak_empkins_style.columns),
    caption=r"Error metrics of Q-peak extraction algorithms on the \textit{EmpkinS Dataset} for different annotators and MAE difference between both annotators. MAE = Mean Absolute Error, ME = Mean Error.",
    label="tab:q_peak_annotation_differences_empkins",
)
latex_output = latex_output.replace(r"\bfseries Mean Absolute Error", r"\bfseries MAE")
latex_output = latex_output.replace(r"\bfseries Mean Error", r"\bfseries ME")
latex_output = latex_output.replace(r"\begin{table}[ht]", r"\begin{table}[ht]\small")
latex_output = latex_output.replace(r"q_peak_algorithm", r"\bfseries Q-peak Algorithm")
latex_output = latex_output.replace(r"\sisetup{", r"\sisetup{round-mode=places,round-precision=2,")

suppl_tab_path_paper.joinpath("tab_q_peak_annotator_difference_empkins.tex").open(mode="w+").write(latex_output)
metrics_q_peak_empkins_style

#### B-Point

In [None]:
metrics_b_point_empkins_01 = compute_pep_performance_metrics(
    results_per_sample_b_point_empkins_01, num_heartbeats=results_agg_total_b_point_empkins_01
)
metrics_b_point_empkins_02 = compute_pep_performance_metrics(
    results_per_sample_b_point_empkins_02, num_heartbeats=results_agg_total_b_point_empkins_02
)

In [None]:
metrics_b_point_empkins = merge_result_metrics_from_multiple_annotators(
    [metrics_b_point_empkins_01, metrics_b_point_empkins_02]
)

In [None]:
metrics_b_point_empkins = metrics_b_point_empkins.rename(index=rename_algorithms).rename(columns=rename_metrics)
metrics_b_point_empkins

In [None]:
metrics_b_point_empkins[[("Annotator Difference", "Mean Absolute Error [ms]", "Mean")]].abs().describe().T

In [None]:
mean_annotator_difference_b_point_empkins = (
    metrics_b_point_empkins[[("Annotator Difference", "Mean Absolute Error [ms]", "Mean")]].abs().describe().T
)
result_dict_empkins["Annotator_Difference_B_Point"] = mean_annotator_difference_b_point_empkins
mean_annotator_difference_b_point_empkins

In [None]:
metrics_b_point_empkins_style = metrics_b_point_empkins.style.background_gradient(
    subset=[("Annotator 1", "Mean Absolute Error [ms]", "Mean"), ("Annotator 2", "Mean Absolute Error [ms]", "Mean")]
)
latex_output = convert_to_latex(
    metrics_b_point_empkins_style,
    collapse_index_columns=False,
    column_header_bold=True,
    column_format="p{1.5cm}p{1.5cm}"
    + "S[table-column-width=0.75cm,table-format=2.2]" * len(metrics_q_peak_empkins_style.columns),
    caption=r"Error metrics of B-point extraction algorithms on the \textit{EmpkinS Dataset} for different annotators and MAE difference between both annotators. MAE = Mean Absolute Error, ME = Mean Error.",
    label="tab:b_point_annotation_differences_empkins",
)
latex_output = latex_output.replace(r"\bfseries Mean Absolute Error", r"\bfseries MAE")
latex_output = latex_output.replace(r"\bfseries Mean Error", r"\bfseries ME")
latex_output = latex_output.replace(r"\begin{table}[ht]", r"\begin{table}[ht]\small")
latex_output = latex_output.replace(r"b_point_algorithm", r"\bfseries B-point Algorithm")
latex_output = latex_output.replace(r"outlier_correction_algorithm", r"\bfseries Outlier Correction Algorithm")
latex_output = latex_output.replace(r"\sisetup{", r"\sisetup{round-mode=places,round-precision=2,")

suppl_tab_path_paper.joinpath("tab_b_point_annotator_difference_empkins.tex").open(mode="w+").write(latex_output)
metrics_b_point_empkins_style

#### Pipeline

In [None]:
metrics_pipeline_empkins_01 = compute_pep_performance_metrics(
    results_per_sample_pipeline_empkins_01, num_heartbeats=results_agg_total_pipeline_empkins_01
)
metrics_pipeline_empkins_02 = compute_pep_performance_metrics(
    results_per_sample_pipeline_empkins_02, num_heartbeats=results_agg_total_pipeline_empkins_02
)

In [None]:
metrics_pipeline_empkins = merge_result_metrics_from_multiple_annotators(
    [metrics_pipeline_empkins_01, metrics_pipeline_empkins_02]
)
metrics_pipeline_empkins = metrics_pipeline_empkins.rename(index=rename_algorithms).rename(columns=rename_metrics)
metrics_pipeline_empkins

In [None]:
mean_annotator_difference_pipeline_empkins = (
    metrics_pipeline_empkins[[("Annotator Difference", "Mean Absolute Error [ms]", "Mean")]].abs().describe().T
)
result_dict_empkins["Annotator_Difference_Pipeline"] = mean_annotator_difference_pipeline_empkins
mean_annotator_difference_pipeline_empkins

In [None]:
metrics_pipeline_empkins_style = metrics_pipeline_empkins.style.background_gradient(
    subset=[("Annotator 1", "Mean Absolute Error [ms]", "Mean"), ("Annotator 2", "Mean Absolute Error [ms]", "Mean")]
).format(precision=2)
latex_output = convert_to_latex(
    metrics_pipeline_empkins_style,
    collapse_index_columns=False,
    column_header_bold=True,
    environment="longtable",
    column_format="p{1.15cm}p{1.25cm}p{1.25cm}"
    + "S[table-column-width=0.75cm,table-format=1.1(2)]" * len(metrics_pipeline_empkins_style.columns),
    caption=r"Error metrics of PEP extraction pipelines on the \textit{EmpkinS Dataset} for different annotators and MAE difference between both annotators. MAE = Mean Absolute Error, ME = Mean Error.",
    label="tab:pipeline_annotation_differences_empkins",
)
latex_output = latex_output.replace(r"\begin{longtable}", r"\begin{small}\begin{longtable}")
latex_output = latex_output.replace(r"\end{longtable}", r"\end{longtable}\end{small}")
latex_output = latex_output.replace(r"\font-weightbold ", r"\bfseries ")
# latex_output = latex_output.replace(r"Van13 ", r"Van13\,")
latex_output = latex_output.replace(r"\bfseries Mean Absolute Error", r"\bfseries MAE")
latex_output = latex_output.replace(r"\bfseries Mean Error", r"\bfseries ME")
latex_output = latex_output.replace(r"q_peak_algorithm", r"\bfseries Q-peak Algorithm")
latex_output = latex_output.replace(r"b_point_algorithm", r"\bfseries B-point Algorithm")
latex_output = latex_output.replace(r"outlier_correction_algorithm", r"\bfseries Outlier Correction Algorithm")
latex_output = latex_output.replace(r"Annotator Difference", r"\bfseries Ann. Diff.")
latex_output = re.sub(
    r"(\\multirow\[c\]\{\d+\}\{\*\})\{(Van13\s*\(\d+\s*ms\))\}", r"\1{\\parbox{1.25cm}{\2}}", latex_output
)

suppl_tab_path_paper.joinpath("tab_pep_pipeline_annotator_difference_empkins.tex").open(mode="w+").write(latex_output)
metrics_pipeline_empkins_style

#### Comparison between different Agreement Levels

In [None]:
annotations_empkins = load_annotations_from_dataset(dataset_empkins_01, dataset_empkins_02)

annotations_ecg_empkins = annotations_empkins.xs("ECG", level="signal")
annotations_icg_empkins = annotations_empkins.xs("ICG", level="signal")

In [None]:
selected_q_peak_algorithm = "martinez2004"

In [None]:
results_q_peak_empkins = merge_results_per_sample_from_different_annotators(
    [results_per_sample_q_peak_empkins_01, results_per_sample_q_peak_empkins_02],
    selected_algorithm=selected_q_peak_algorithm,
)
results_q_peak_empkins

In [None]:
selected_b_point_algorithm = ("drost2022", "none")

In [None]:
results_b_point_empkins = merge_results_per_sample_from_different_annotators(
    [results_per_sample_b_point_empkins_01, results_per_sample_b_point_empkins_02],
    selected_algorithm=selected_b_point_algorithm,
)
results_b_point_empkins

In [None]:
selected_pipeline = ("forouzanfar2018", "drost2022", "none")

In [None]:
results_pipeline_empkins = merge_results_per_sample_from_different_annotators(
    [results_per_sample_pipeline_empkins_01, results_per_sample_pipeline_empkins_02],
    selected_algorithm=selected_pipeline,
)
results_pipeline_empkins

In [None]:
results_q_peak_agreement_bins_empkins = add_annotation_agreement_to_results_dataframe(
    results_q_peak_empkins, annotations_ecg_empkins, dataset_empkins_01.sampling_rate_ecg
)
results_q_peak_agreement_bins_empkins = results_q_peak_agreement_bins_empkins.groupby(
    "agreement_bins", observed=True
).agg(["mean", "std"])
result_dict_empkins["Annotation_Agreement_Q_Peak"] = results_q_peak_agreement_bins_empkins

results_q_peak_agreement_bins_empkins

In [None]:
latex_output = convert_to_latex(
    results_q_peak_agreement_bins_empkins.droplevel([1, 2], axis=1),
    collapse_index_columns=False,
    column_header_bold=True,
    column_format="p{2.0cm}"
    + "S[table-column-width=0.75cm,table-format=2.2]" * len(results_q_peak_agreement_bins_empkins.columns),
    caption=r"Effect of annotation agreement on the absolute error (AE) of selected Q-peak extraction algorithm (Mar04) on the \textit{EmpkinS Dataset}. Annotation agreements: \textit{high}: [0\,ms, 4\,ms], \textit{medium}: [5\,ms, 10\,ms], \textit{low}: $\geq$ 11\,ms",
    label="tab:q_peak_annotation_agreement_empkins",
)

latex_output = latex_output.replace("agreement_bins", "Agreement Bins")
latex_output = latex_output.replace(r"\bfseries mean", r"\bfseries Mean")
latex_output = latex_output.replace(r"\bfseries std", r"\bfseries SD")
latex_output = latex_output.replace(r"\sisetup{", r"\sisetup{round-mode=places,round-precision=2,")

suppl_tab_path_paper.joinpath("tab_q_peak_annotation_agreement_empkins.tex").open(mode="w+").write(latex_output)

print(latex_output)

In [None]:
results_b_point_agreement_bins_empkins = add_annotation_agreement_to_results_dataframe(
    results_b_point_empkins, annotations_icg_empkins, dataset_empkins_01.sampling_rate_icg
)
results_b_point_agreement_bins_empkins = results_b_point_agreement_bins_empkins.groupby(
    "agreement_bins", observed=True
).agg(["mean", "std"])
result_dict_empkins["Annotation_Agreement_B_Point"] = results_b_point_agreement_bins_empkins

results_b_point_agreement_bins_empkins

In [None]:
latex_output = convert_to_latex(
    results_b_point_agreement_bins_empkins.droplevel([1, 2], axis=1),
    collapse_index_columns=False,
    column_header_bold=True,
    column_format="p{2.0cm}"
    + "S[table-column-width=0.75cm,table-format=2.2]" * len(results_q_peak_agreement_bins_empkins.columns),
    caption=r"Effect of annotation agreement on the absolute error (AE) of selected B-point extraction algorithm (Dro22) on the \textit{EmpkinS Dataset}. Annotation agreements: \textit{high}: [0\,ms, 4\,ms], \textit{medium}: [5\,ms, 10\,ms], \textit{low}: $\geq$ 11\,ms.",
    label="tab:b_point_annotation_agreement_empkins",
)

latex_output = latex_output.replace("agreement_bins", "Agreement Bins")
latex_output = latex_output.replace(r"\bfseries mean", r"\bfseries Mean")
latex_output = latex_output.replace(r"\bfseries std", r"\bfseries SD")
latex_output = latex_output.replace(r"\sisetup{", r"\sisetup{round-mode=places,round-precision=2,")

suppl_tab_path_paper.joinpath("tab_b_point_annotation_agreement_empkins.tex").open(mode="w+").write(latex_output)

print(latex_output)

In [None]:
results_pipeline_q_peak_agreement_bins_empkins = add_annotation_agreement_to_results_dataframe(
    results_pipeline_empkins, annotations_ecg_empkins, dataset_empkins_01.sampling_rate_icg
)
results_pipeline_q_peak_agreement_bins_empkins = results_pipeline_q_peak_agreement_bins_empkins.groupby(
    "agreement_bins", observed=True
).agg(["mean", "std"])
result_dict_empkins["Annotation_Agreement_Pipeline_Q_Peak"] = results_pipeline_q_peak_agreement_bins_empkins

results_pipeline_q_peak_agreement_bins_empkins

In [None]:
latex_output = convert_to_latex(
    results_pipeline_q_peak_agreement_bins_empkins.droplevel([1, 2], axis=1),
    collapse_index_columns=False,
    column_header_bold=True,
    column_format="p{2.0cm}"
    + "S[table-column-width=0.75cm,table-format=2.2]" * len(results_q_peak_agreement_bins_empkins.columns),
    caption=r"Effect of annotation agreement on the absolute error (AE) of selected PEP pipeline [For18, Dro22, None] on the \textit{EmpkinS Dataset}, using Q-peak annotations for agreement computation. Annotation agreements: \textit{high}: [0\,ms, 4\,ms], \textit{medium}: [5\,ms, 10\,ms], \textit{low}: $\geq$ 11\,ms.",
    label="tab:pipeline_q_peak_annotation_agreement_empkins",
)

latex_output = latex_output.replace("agreement_bins", "Agreement Bins")
latex_output = latex_output.replace(r"\bfseries mean", r"\bfseries Mean")
latex_output = latex_output.replace(r"\bfseries std", r"\bfseries SD")
latex_output = latex_output.replace(r"\sisetup{", r"\sisetup{round-mode=places,round-precision=2,")

suppl_tab_path_paper.joinpath("tab_pep_pipeline_q_peak_annotation_agreement_empkins.tex").open(mode="w+").write(
    latex_output
)

print(latex_output)

In [None]:
results_pipeline_b_point_agreement_bins_empkins = add_annotation_agreement_to_results_dataframe(
    results_pipeline_empkins, annotations_icg_empkins, dataset_empkins_01.sampling_rate_icg
)
results_pipeline_b_point_agreement_bins_empkins = results_pipeline_b_point_agreement_bins_empkins.groupby(
    "agreement_bins", observed=True
).agg(["mean", "std"])
result_dict_empkins["Annotation_Agreement_Pipeline_B_Point"] = results_pipeline_b_point_agreement_bins_empkins

results_pipeline_b_point_agreement_bins_empkins

In [None]:
latex_output = convert_to_latex(
    results_pipeline_b_point_agreement_bins_empkins.droplevel([1, 2], axis=1),
    collapse_index_columns=False,
    column_header_bold=True,
    column_format="p{2.0cm}"
    + "S[table-column-width=0.75cm,table-format=2.2]" * len(results_q_peak_agreement_bins_empkins.columns),
    caption=r"Effect of annotation agreement on the absolute error (AE) of selected PEP pipeline [For18, Dro22, None] on the \textit{EmpkinS Dataset}, using B-point annotations for agreement computation. Annotation agreements: \textit{high}: [0\,ms, 4\,ms], \textit{medium}: [5\,ms, 10\,ms], \textit{low}: $\geq$ 11\,ms.",
    label="tab:pipeline_b_point_annotation_agreement_empkins",
)

latex_output = latex_output.replace("agreement_bins", "Agreement Bins")
latex_output = latex_output.replace(r"\bfseries mean", r"\bfseries Mean")
latex_output = latex_output.replace(r"\bfseries std", r"\bfseries SD")
latex_output = latex_output.replace(r"\sisetup{", r"\sisetup{round-mode=places,round-precision=2,")


suppl_tab_path_paper.joinpath("tab_pep_pipeline_b_point_annotation_agreement_empkins.tex").open(mode="w+").write(
    latex_output
)

print(latex_output)

## Guardian Dataset

In [None]:
dataset_guardian_01 = GuardianDataset(guardian_base_path, use_cache=True, only_labeled=True, label_type="rater_01")
dataset_guardian_01

In [None]:
dataset_guardian_02 = GuardianDataset(guardian_base_path, use_cache=True, only_labeled=True, label_type="rater_02")
dataset_guardian_02

### Load Data

#### Q-Peak

In [None]:
results_q_peak_guardian_01 = load_challenge_results_from_folder(
    result_path.joinpath("guardian_dataset_q_peak/rater_01"),
    index_cols_per_sample=["participant", "phase"],
)

In [None]:
results_q_peak_guardian_02 = load_challenge_results_from_folder(
    result_path.joinpath("guardian_dataset_q_peak/rater_02"),
    index_cols_per_sample=["participant", "phase"],
)

In [None]:
results_per_sample_q_peak_guardian_01 = results_q_peak_guardian_01.per_sample.droplevel([1, 2])
results_agg_total_q_peak_guardian_01 = results_q_peak_guardian_01.agg_total.droplevel([1, 2])

results_per_sample_q_peak_guardian_02 = results_q_peak_guardian_02.per_sample.droplevel([1, 2])
results_agg_total_q_peak_guardian_02 = results_q_peak_guardian_02.agg_total.droplevel([1, 2])

results_per_sample_q_peak_guardian_02.head()

#### B-Point

In [None]:
results_b_point_guardian_01 = load_challenge_results_from_folder(
    result_path.joinpath("guardian_dataset_b_point/rater_01"),
    index_cols_per_sample=["participant", "phase"],
)

In [None]:
results_b_point_guardian_02 = load_challenge_results_from_folder(
    result_path.joinpath("guardian_dataset_b_point/rater_02"),
    index_cols_per_sample=["participant", "phase"],
)

In [None]:
results_per_sample_b_point_guardian_01 = results_b_point_guardian_01.per_sample.droplevel([0])
results_agg_total_b_point_guardian_01 = results_b_point_guardian_01.agg_total.droplevel([0])

results_per_sample_b_point_guardian_02 = results_b_point_guardian_02.per_sample.droplevel([0])
results_agg_total_b_point_guardian_02 = results_b_point_guardian_02.agg_total.droplevel([0])
results_per_sample_b_point_guardian_02.head()

#### Pipeline

In [None]:
results_pipeline_guardian_01 = load_challenge_results_from_folder(
    result_path.joinpath("guardian_dataset_both_algorithms/rater_01"),
    index_cols_per_sample=["participant", "phase"],
)
results_pipeline_guardian_02 = load_challenge_results_from_folder(
    result_path.joinpath("guardian_dataset_both_algorithms/rater_02"),
    index_cols_per_sample=["participant", "phase"],
)

In [None]:
results_per_sample_pipeline_guardian_01 = results_pipeline_guardian_01.per_sample
results_agg_total_pipeline_guardian_01 = results_pipeline_guardian_01.agg_total

results_per_sample_pipeline_guardian_02 = results_pipeline_guardian_02.per_sample
results_agg_total_pipeline_guardian_02 = results_pipeline_guardian_02.agg_total
results_per_sample_pipeline_guardian_02.head()

### Compute Metrics

#### Q-Peak

In [None]:
metrics_q_peak_guardian_01 = compute_pep_performance_metrics(
    results_per_sample_q_peak_guardian_01, num_heartbeats=results_agg_total_q_peak_guardian_01
)
metrics_q_peak_guardian_02 = compute_pep_performance_metrics(
    results_per_sample_q_peak_guardian_02, num_heartbeats=results_agg_total_q_peak_guardian_02
)

In [None]:
metrics_q_peak_guardian = merge_result_metrics_from_multiple_annotators(
    [metrics_q_peak_guardian_01, metrics_q_peak_guardian_02]
)
metrics_q_peak_guardian = metrics_q_peak_guardian.rename(index=rename_algorithms).rename(columns=rename_metrics)
metrics_q_peak_guardian

In [None]:
mean_annotator_difference_q_peak_guardian = (
    metrics_q_peak_guardian[[("Annotator Difference", "Mean Absolute Error [ms]", "Mean")]].abs().describe().T
)
result_dict_guardian["Annotator_Difference_Q_Peak"] = mean_annotator_difference_q_peak_guardian
mean_annotator_difference_q_peak_guardian

In [None]:
metrics_q_peak_guardian_style = metrics_q_peak_guardian.style.background_gradient(
    subset=[("Annotator 1", "Mean Absolute Error [ms]", "Mean"), ("Annotator 2", "Mean Absolute Error [ms]", "Mean")]
)
latex_output = convert_to_latex(
    metrics_q_peak_guardian_style,
    collapse_index_columns=False,
    column_header_bold=True,
    column_format="p{3.0cm}"
    + "S[table-column-width=0.75cm,table-format=2.2]" * len(metrics_q_peak_guardian_style.columns),
    caption=r"Error metrics of Q-peak extraction algorithms on the \textit{Guardian Dataset} for different annotators and MAE difference between both annotators. MAE = Mean Absolute Error, ME = Mean Error.",
    label="tab:q_peak_annotation_differences_guardian",
)
latex_output = latex_output.replace(r"\bfseries Mean Absolute Error", r"\bfseries MAE")
latex_output = latex_output.replace(r"\bfseries Mean Error", r"\bfseries ME")
latex_output = latex_output.replace(r"\begin{table}[ht]", r"\begin{table}[ht]\small")
latex_output = latex_output.replace(r"q_peak_algorithm", r"\bfseries Q-peak Algorithm")
latex_output = latex_output.replace(r"\sisetup{", r"\sisetup{round-mode=places,round-precision=2,")

suppl_tab_path_paper.joinpath("tab_q_peak_annotator_difference_guardian.tex").open(mode="w+").write(latex_output)
metrics_q_peak_guardian_style

#### B-Point

In [None]:
metrics_b_point_guardian_01 = compute_pep_performance_metrics(
    results_per_sample_b_point_guardian_01, num_heartbeats=results_agg_total_b_point_guardian_01
)
metrics_b_point_guardian_02 = compute_pep_performance_metrics(
    results_per_sample_b_point_guardian_02, num_heartbeats=results_agg_total_b_point_guardian_02
)

In [None]:
metrics_b_point_guardian = merge_result_metrics_from_multiple_annotators(
    [metrics_b_point_guardian_01, metrics_b_point_guardian_02]
)
metrics_b_point_guardian = metrics_b_point_guardian.rename(index=rename_algorithms).rename(columns=rename_metrics)
metrics_b_point_guardian

In [None]:
mean_annotator_difference_b_point_guardian = (
    metrics_b_point_guardian[[("Annotator Difference", "Mean Absolute Error [ms]", "Mean")]].abs().describe().T
)
result_dict_guardian["Annotator_Difference_B_Point"] = mean_annotator_difference_b_point_guardian
mean_annotator_difference_b_point_guardian

In [None]:
metrics_b_point_guardian_style = metrics_b_point_guardian.style.background_gradient(
    subset=[("Annotator 1", "Mean Absolute Error [ms]", "Mean"), ("Annotator 2", "Mean Absolute Error [ms]", "Mean")]
)
latex_output = convert_to_latex(
    metrics_b_point_guardian_style,
    collapse_index_columns=False,
    column_header_bold=True,
    column_format="p{1.5cm}p{1.5cm}"
    + "S[table-column-width=0.75cm,table-format=2.2]" * len(metrics_q_peak_guardian_style.columns),
    caption=r"Error metrics of B-point extraction algorithms on the \textit{Guardian Dataset} for different annotators and MAE difference between both annotators. MAE = Mean Absolute Error, ME = Mean Error.",
    label="tab:b_point_annotation_differences_guardian",
)
latex_output = latex_output.replace(r"\bfseries Mean Absolute Error", r"\bfseries MAE")
latex_output = latex_output.replace(r"\bfseries Mean Error", r"\bfseries ME")
latex_output = latex_output.replace(r"\begin{table}[ht]", r"\begin{table}[ht]\small")
latex_output = latex_output.replace(r"b_point_algorithm", r"\bfseries B-point Algorithm")
latex_output = latex_output.replace(r"outlier_correction_algorithm", r"\bfseries Outlier Correction Algorithm")
latex_output = latex_output.replace(r"\sisetup{", r"\sisetup{round-mode=places,round-precision=2,")

suppl_tab_path_paper.joinpath("tab_b_point_annotator_difference_guardian.tex").open(mode="w+").write(latex_output)
metrics_b_point_guardian_style

#### Pipeline

In [None]:
metrics_pipeline_guardian_01 = compute_pep_performance_metrics(
    results_per_sample_pipeline_guardian_01, num_heartbeats=results_agg_total_pipeline_guardian_01
)
metrics_pipeline_guardian_02 = compute_pep_performance_metrics(
    results_per_sample_pipeline_guardian_02, num_heartbeats=results_agg_total_pipeline_guardian_02
)

In [None]:
metrics_pipeline_guardian = merge_result_metrics_from_multiple_annotators(
    [metrics_pipeline_guardian_01, metrics_pipeline_guardian_02]
)
metrics_pipeline_guardian = metrics_pipeline_guardian.rename(index=rename_algorithms).rename(columns=rename_metrics)
metrics_pipeline_guardian

In [None]:
mean_annotator_difference_pipeline_guardian = (
    metrics_pipeline_guardian[[("Annotator Difference", "Mean Absolute Error [ms]", "Mean")]].abs().describe().T
)
result_dict_guardian["Annotator_Difference_Pipeline"] = mean_annotator_difference_pipeline_guardian
mean_annotator_difference_pipeline_guardian

In [None]:
metrics_pipeline_guardian_style = metrics_pipeline_guardian.style.background_gradient(
    subset=[("Annotator 1", "Mean Absolute Error [ms]", "Mean"), ("Annotator 2", "Mean Absolute Error [ms]", "Mean")]
).format(precision=2)
latex_output = convert_to_latex(
    metrics_pipeline_guardian_style,
    collapse_index_columns=False,
    column_header_bold=True,
    environment="longtable",
    column_format="p{1.25cm}p{1.25cm}p{1.25cm}"
    + "S[table-column-width=0.75cm,table-format=1.1(2)]" * len(metrics_pipeline_guardian_style.columns),
    caption=r"Error metrics of PEP extraction pipelines on the \textit{Guardian Dataset} for different annotators and MAE difference between both annotators. MAE = Mean Absolute Error, ME = Mean Error.",
    label="tab:pipeline_annotation_differences_guardian",
)
latex_output = latex_output.replace(r"\begin{longtable}", r"\begin{small}\begin{longtable}")
latex_output = latex_output.replace(r"\end{longtable}", r"\end{longtable}\end{small}")
latex_output = latex_output.replace(r"\font-weightbold ", r"\bfseries ")
# latex_output = latex_output.replace(r"Van13 ", r"Van13\,")
latex_output = latex_output.replace(r"\bfseries Mean Absolute Error", r"\bfseries MAE")
latex_output = latex_output.replace(r"\bfseries Mean Error", r"\bfseries ME")
latex_output = latex_output.replace(r"q_peak_algorithm", r"\bfseries Q-peak Algorithm")
latex_output = latex_output.replace(r"b_point_algorithm", r"\bfseries B-point Algorithm")
latex_output = latex_output.replace(r"outlier_correction_algorithm", r"\bfseries Outlier Correction Algorithm")
latex_output = latex_output.replace(r"Annotator Difference", r"\bfseries Ann. Diff.")
latex_output = re.sub(
    r"(\\multirow\[c\]\{\d+\}\{\*\})\{(Van13\s*\(\d+\s*ms\))\}", r"\1{\\parbox{1.25cm}{\2}}", latex_output
)

suppl_tab_path_paper.joinpath("tab_pep_pipeline_annotator_difference_guardian.tex").open(mode="w+").write(latex_output)
metrics_pipeline_guardian_style

#### Comparison between different Agreement Levels

In [None]:
selected_q_peak_algorithm = "martinez2004"

In [None]:
results_q_peak_guardian = merge_results_per_sample_from_different_annotators(
    [results_per_sample_q_peak_guardian_01, results_per_sample_q_peak_guardian_02],
    selected_algorithm=selected_q_peak_algorithm,
)
results_q_peak_guardian

In [None]:
selected_b_point_algorithm = ("drost2022", "none")

In [None]:
results_b_point_guardian = merge_results_per_sample_from_different_annotators(
    [results_per_sample_b_point_guardian_01, results_per_sample_b_point_guardian_02],
    selected_algorithm=selected_b_point_algorithm,
)
results_b_point_guardian

In [None]:
selected_pipeline = ("forouzanfar2018", "drost2022", "none")

In [None]:
results_pipeline_guardian = merge_results_per_sample_from_different_annotators(
    [results_per_sample_pipeline_guardian_01, results_per_sample_pipeline_guardian_02],
    selected_algorithm=selected_pipeline,
)
results_pipeline_guardian

In [None]:
annotations_guardian = load_annotations_from_dataset(dataset_guardian_01, dataset_guardian_02)

annotations_ecg_guardian = annotations_guardian.xs("ECG", level="signal")
annotations_icg_guardian = annotations_guardian.xs("ICG", level="signal")

In [None]:
results_q_peak_agreement_bins_guardian = add_annotation_agreement_to_results_dataframe(
    results_q_peak_guardian, annotations_ecg_guardian, dataset_guardian_01.sampling_rate_ecg
)
results_q_peak_agreement_bins_guardian = results_q_peak_agreement_bins_guardian.groupby(
    "agreement_bins", observed=True
).agg(["mean", "std"])
result_dict_guardian["Annotation_Agreement_Q_Peak"] = results_q_peak_agreement_bins_guardian

results_q_peak_agreement_bins_guardian

In [None]:
latex_output = convert_to_latex(
    results_q_peak_agreement_bins_guardian.droplevel([1, 2], axis=1),
    collapse_index_columns=False,
    column_header_bold=True,
    column_format="p{2.0cm}"
    + "S[table-column-width=0.75cm,table-format=2.2]" * len(results_q_peak_agreement_bins_guardian.columns),
    caption=r"Effect of annotation agreement on the absolute error (AE) of selected Q-peak extraction algorithm (Mar04) on the \textit{Guardian Dataset}. Annotation agreements: \textit{high}: [0\,ms, 4\,ms], \textit{medium}: [5\,ms, 10\,ms], \textit{low}: $\geq$ 11\,ms",
    label="tab:q_peak_annotation_agreement_guardian",
)

latex_output = latex_output.replace("agreement_bins", "Agreement Bins")
latex_output = latex_output.replace(r"\bfseries mean", r"\bfseries Mean")
latex_output = latex_output.replace(r"\bfseries std", r"\bfseries SD")
latex_output = latex_output.replace(r"\sisetup{", r"\sisetup{round-mode=places,round-precision=2,")

suppl_tab_path_paper.joinpath("tab_q_peak_annotation_agreement_guardian.tex").open(mode="w+").write(latex_output)

print(latex_output)

In [None]:
results_b_point_agreement_bins_guardian = add_annotation_agreement_to_results_dataframe(
    results_b_point_guardian, annotations_icg_guardian, dataset_guardian_01.sampling_rate_icg
)
results_b_point_agreement_bins_guardian = results_b_point_agreement_bins_guardian.groupby(
    "agreement_bins", observed=True
).agg(["mean", "std"])
result_dict_guardian["Annotation_Agreement_B_Point"] = results_b_point_agreement_bins_guardian

results_b_point_agreement_bins_guardian

In [None]:
latex_output = convert_to_latex(
    results_b_point_agreement_bins_guardian.droplevel([1, 2], axis=1),
    collapse_index_columns=False,
    column_header_bold=True,
    column_format="p{2.0cm}"
    + "S[table-column-width=0.75cm,table-format=2.2]" * len(results_q_peak_agreement_bins_guardian.columns),
    caption=r"Effect of annotation agreement on the absolute error (AE) of selected B-point extraction algorithm (Dro22) on the \textit{Guardian Dataset}. Annotation agreements: \textit{high}: [0\,ms, 4\,ms], \textit{medium}: [5\,ms, 10\,ms], \textit{low}: $\geq$ 11\,ms.",
    label="tab:b_point_annotation_agreement_guardian",
)

latex_output = latex_output.replace("agreement_bins", "Agreement Bins")
latex_output = latex_output.replace(r"\bfseries mean", r"\bfseries Mean")
latex_output = latex_output.replace(r"\bfseries std", r"\bfseries SD")
latex_output = latex_output.replace(r"\sisetup{", r"\sisetup{round-mode=places,round-precision=2,")

suppl_tab_path_paper.joinpath("tab_b_point_annotation_agreement_guardian.tex").open(mode="w+").write(latex_output)

print(latex_output)

In [None]:
results_pipeline_q_peak_agreement_bins_guardian = add_annotation_agreement_to_results_dataframe(
    results_pipeline_guardian, annotations_ecg_guardian, dataset_guardian_01.sampling_rate_icg
)
results_pipeline_q_peak_agreement_bins_guardian = results_pipeline_q_peak_agreement_bins_guardian.groupby(
    "agreement_bins", observed=True
).agg(["mean", "std"])
result_dict_guardian["Annotation_Agreement_Pipeline_Q_Peak"] = results_pipeline_q_peak_agreement_bins_guardian

results_pipeline_q_peak_agreement_bins_guardian

In [None]:
latex_output = convert_to_latex(
    results_pipeline_q_peak_agreement_bins_guardian.droplevel([1, 2], axis=1),
    collapse_index_columns=False,
    column_header_bold=True,
    column_format="p{2.0cm}"
    + "S[table-column-width=0.75cm,table-format=2.2]" * len(results_q_peak_agreement_bins_guardian.columns),
    caption=r"Effect of annotation agreement on the absolute error (AE) of selected PEP pipeline [For18, Dro22, None] on the \textit{Guardian Dataset}, using Q-peak annotations for agreement computation. Annotation agreements: \textit{high}: [0\,ms, 4\,ms], \textit{medium}: [5\,ms, 10\,ms], \textit{low}: $\geq$ 11\,ms.",
    label="tab:pipeline_q_peak_annotation_agreement_guardian",
)

latex_output = latex_output.replace("agreement_bins", "Agreement Bins")
latex_output = latex_output.replace(r"\bfseries mean", r"\bfseries Mean")
latex_output = latex_output.replace(r"\bfseries std", r"\bfseries SD")
latex_output = latex_output.replace(r"\sisetup{", r"\sisetup{round-mode=places,round-precision=2,")

suppl_tab_path_paper.joinpath("tab_pep_pipeline_q_peak_annotation_agreement_guardian.tex").open(mode="w+").write(
    latex_output
)

print(latex_output)

In [None]:
results_pipeline_b_point_agreement_bins_guardian = add_annotation_agreement_to_results_dataframe(
    results_pipeline_guardian, annotations_icg_guardian, dataset_guardian_01.sampling_rate_icg
)
results_pipeline_b_point_agreement_bins_guardian = results_pipeline_b_point_agreement_bins_guardian.groupby(
    "agreement_bins", observed=True
).agg(["mean", "std"])
result_dict_guardian["Annotation_Agreement_Pipeline_B_Point"] = results_pipeline_b_point_agreement_bins_guardian

results_pipeline_b_point_agreement_bins_guardian

In [None]:
latex_output = convert_to_latex(
    results_pipeline_b_point_agreement_bins_guardian.droplevel([1, 2], axis=1),
    collapse_index_columns=False,
    column_header_bold=True,
    column_format="p{2.0cm}"
    + "S[table-column-width=0.75cm,table-format=2.2]" * len(results_q_peak_agreement_bins_guardian.columns),
    caption=r"Effect of annotation agreement on the absolute error (AE) of selected PEP pipeline [For18, Dro22, None] on the \textit{Guardian Dataset}, using B-point annotations for agreement computation. Annotation agreements: \textit{high}: [0\,ms, 4\,ms], \textit{medium}: [5\,ms, 10\,ms], \textit{low}: $\geq$ 11\,ms.",
    label="tab:pipeline_b_point_annotation_agreement_guardian",
)

latex_output = latex_output.replace("agreement_bins", "Agreement Bins")
latex_output = latex_output.replace(r"\bfseries mean", r"\bfseries Mean")
latex_output = latex_output.replace(r"\bfseries std", r"\bfseries SD")
latex_output = latex_output.replace(r"\sisetup{", r"\sisetup{round-mode=places,round-precision=2,")

suppl_tab_path_paper.joinpath("tab_pep_pipeline_b_point_annotation_agreement_guardian.tex").open(mode="w+").write(
    latex_output
)

print(latex_output)

## Combined

### Comparison between different Agreement Levels

In [None]:
results_q_peak_agreement = pd.concat(
    {
        "EmpkinS": results_q_peak_agreement_bins_empkins.groupby("agreement_bins", observed=True).agg(["mean", "std"]),
        "Guardian": results_q_peak_agreement_bins_guardian.groupby("agreement_bins", observed=True).agg(
            ["mean", "std"]
        ),
    },
    axis=0,
).dropna(axis=1)

results_q_peak_agreement = results_q_peak_agreement.droplevel([1, 2, -1], axis=1)
results_q_peak_agreement

In [None]:
results_b_point_agreement = pd.concat(
    {
        "EmpkinS": results_b_point_agreement_bins_empkins.groupby("agreement_bins", observed=True).agg(["mean", "std"]),
        "Guardian": results_b_point_agreement_bins_guardian.groupby("agreement_bins", observed=True).agg(
            ["mean", "std"]
        ),
    },
    axis=0,
).dropna(axis=1)

results_b_point_agreement = results_b_point_agreement.droplevel([1, 2, -1], axis=1)
results_b_point_agreement