# Analysis B-Point Algorithms - EmpkinS & Guardian Combined

## Setup and Helper Functions

### Imports

In [None]:
import json
from pathlib import Path

import biopsykit as bp
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from biopsykit.utils.dataframe_handling import multi_xs
from fau_colors import cmaps, register_fausans_font

from pepbench.datasets import EmpkinsDataset, GuardianDataset
from pepbench.export import convert_to_latex, create_nan_reason_table
from pepbench.io import load_challenge_results_from_folder
from pepbench.plotting.results import (
    boxplot_algorithm_performance,
)
from pepbench.utils import get_nan_reason_mapping

%matplotlib widget
%load_ext autoreload
%autoreload 2

In [None]:
register_fausans_font()
plt.close("all")

palette = sns.color_palette(cmaps.faculties_light)
sns.set_theme(context="notebook", style="ticks", font="sans-serif", palette=palette)

plt.rcParams["figure.figsize"] = (10, 5)
plt.rcParams["pdf.fonttype"] = 42
plt.rcParams["mathtext.default"] = "regular"
plt.rcParams["font.family"] = "sans-serif"
plt.rcParams["font.sans-serif"] = "FAUSans Office"

palette

In [None]:
root_path = Path("../../")

In [None]:
deploy_type = "local"

config_dict = json.load(root_path.joinpath("config.json").open(encoding="utf-8"))

empkins_base_path = Path(config_dict[deploy_type]["empkins_path"])
guardian_base_path = Path(config_dict[deploy_type]["guardian_path"])
print(empkins_base_path, guardian_base_path)

### Input Paths

In [None]:
result_path = root_path.joinpath("results")

### Output Paths

In [None]:
paper_path = json.load(root_path.joinpath("paper_path.json").open(encoding="utf-8"))["paper_path"]
paper_path = Path(paper_path)

export_path = root_path.joinpath("exports")
img_path = export_path.joinpath("plots")
stats_path = export_path.joinpath("stats")

img_path_paper = paper_path.joinpath("img")
tab_path_paper = paper_path.joinpath("tab")
suppl_img_path_paper = paper_path.joinpath("supplementary_material/img")
suppl_tab_path_paper = paper_path.joinpath("supplementary_material/tab")

bp.utils.file_handling.mkdirs(
    [
        result_path,
        export_path,
        img_path,
        stats_path,
        img_path_paper,
        tab_path_paper,
        suppl_img_path_paper,
        suppl_tab_path_paper,
    ]
)

In [None]:
algo_levels = ["b_point_algorithm", "outlier_correction_algorithm"]
algo_level_mapping = dict(zip(algo_levels, ["B-Point Algorithm", "Outlier Correction"], strict=False))

In [None]:
rater_id = "rater_01"

## Load EmpkinsDataset

In [None]:
dataset_empkins = EmpkinsDataset(empkins_base_path, use_cache=True, only_labeled=True, label_type=rater_id)
dataset_empkins

In [None]:
selected_algos_for_plotting = [
    ("debski1993-second-derivative", "none"),
    ("lozano2007-linear-regression", "none"),
    ("forouzanfar2018", "none"),
    ("drost2022", "none"),
]

In [None]:
results_empkins = load_challenge_results_from_folder(
    result_path.joinpath(f"empkins_dataset_b_point/{rater_id}"),
    index_cols_per_sample=["participant", "condition", "phase"],
)

In [None]:
results_per_sample_empkins = results_empkins.per_sample.droplevel([0])
results_agg_total_empkins = results_empkins.agg_total.droplevel([0])
results_per_sample_empkins.head()

In [None]:
results_empkins_plot = multi_xs(data=results_per_sample_empkins, keys=selected_algos_for_plotting, level=algo_levels)
results_empkins_plot = results_empkins_plot.droplevel("outlier_correction_algorithm")
results_empkins_plot = results_empkins_plot.reindex([s[0] for s in selected_algos_for_plotting], level=0)
results_empkins_plot.head()

## Load GuardianDataset

In [None]:
dataset_guardian = GuardianDataset(guardian_base_path, use_cache=True, only_labeled=True, label_type=rater_id)
dataset_guardian

In [None]:
results_guardian = load_challenge_results_from_folder(
    result_path.joinpath(f"guardian_dataset_b_point/{rater_id}"),
    index_cols_per_sample=["participant", "phase"],
)

In [None]:
results_per_sample_guardian = results_guardian.per_sample.droplevel([0])
results_agg_total_guardian = results_guardian.agg_total.droplevel([0])
results_per_sample_guardian.head()

In [None]:
results_guardian_plot = multi_xs(data=results_per_sample_guardian, keys=selected_algos_for_plotting, level=algo_levels)
results_guardian_plot = results_guardian_plot.droplevel("outlier_correction_algorithm")
results_guardian_plot = results_guardian_plot.reindex([s[0] for s in selected_algos_for_plotting], level=0)
results_guardian_plot.head()

## Combined Figures

### Absolute Error - EmpkinS and Guardian

In [None]:
fig, axs = plt.subplots(ncols=2, figsize=(10, 2), sharey=True)

boxplot_algorithm_performance(
    results_empkins_plot,
    metric="absolute_error_per_sample_ms",
    showmeans=True,
    showfliers=False,
    width=0.9,
    title="EmpkinS Dataset – B-Point Detection",
    fig=fig,
    ax=axs[0],
)
boxplot_algorithm_performance(
    results_guardian_plot,
    metric="absolute_error_per_sample_ms",
    showmeans=True,
    showfliers=False,
    width=0.9,
    title="Guardian Dataset – B-Point Detection",
    fig=fig,
    ax=axs[1],
)
for ax in axs:
    ax.set_xlabel(None)
axs[1].set_ylabel(None)

fig.tight_layout()

for path in [img_path, img_path_paper]:
    fig.savefig(path.joinpath("img_boxplot_b_point_algorithms_mae_combined.pdf"), transparent=True)

### `NaN` Reason Table

In [None]:
outlier_algos = ["none", "linear-interpolation", "forouzanfar2018"]
outlier_algos_rename = ["None", "LinInt", "For18"]

In [None]:
nan_reason_table_empkins = create_nan_reason_table(
    results_per_sample_empkins, outlier_algos=outlier_algos, use_short_names=True
)
nan_reason_table_empkins.head()

In [None]:
nan_reason_table_guardian = create_nan_reason_table(
    results_per_sample_guardian, outlier_algos=outlier_algos, use_short_names=True
)
nan_reason_table_guardian.head()

In [None]:
nan_reason_table_combined = pd.concat(
    {"EmpkinS": nan_reason_table_empkins, "Guardian": nan_reason_table_guardian}, names=["Dataset"], axis=1
)
nan_reason_table_combined = nan_reason_table_combined.reorder_levels([1, 0], axis=1).sort_index(axis=1)
nan_reason_table_combined = nan_reason_table_combined.xs("None", level="Outlier Correction")
nan_reason_table_combined

#### To LaTeX

In [None]:
latex_output = convert_to_latex(
    nan_reason_table_empkins,
    column_format="p{1.5cm}" * 2 + "p{1.0cm}" * len(nan_reason_table_empkins.columns),
    column_header_bold=True,
    escape_columns=True,
    caption=r"Overview of invalid PEP reasons for different B-point algorithms on the \textit{EmpkinS Dataset}. Abbreviations: "
    + ", ".join([rf"\textit{{{k}}}: {v}" for k, v in get_nan_reason_mapping().items()]),
    label="tab:nan_reasons_empkins",
)

# some manual post-processing
latex_output = latex_output.replace(r"\centering", r"\small\centering")
latex_output = latex_output.replace(r"{Reason}", r"{\bfseries Reason}")

suppl_tab_path_paper.joinpath("tab_b_point_nan_reason_empkins.tex").open(mode="w+").write(latex_output)
print(latex_output)

In [None]:
heartbeat_ids_invalid = results_per_sample_guardian.loc[
    results_per_sample_guardian.index[(~results_per_sample_guardian[("nan_reason", "estimated")].isna())]
].loc[pd.IndexSlice["lozano2007-linear-regression", "none"]][[("heartbeat_id", "estimated")]]
heartbeat_ids_invalid = heartbeat_ids_invalid.rename({"TiltDown": "TiltLevel"}, level="phase")
heartbeat_ids_invalid

In [None]:
fig, ax = plt.subplots()

sns.violinplot(
    data=heartbeat_ids_invalid.reset_index(),
    x="phase",
    hue="phase",
    y=("heartbeat_id", "estimated"),
    ax=ax,
    palette=cmaps.faculties_125,
    order=dataset_guardian.PHASES,
)
sns.swarmplot(
    data=heartbeat_ids_invalid.reset_index(),
    x="phase",
    hue="phase",
    y=("heartbeat_id", "estimated"),
    ax=ax,
    palette=cmaps.faculties,
    order=dataset_guardian.PHASES,
)

ax.set_ylabel("Heartbeat ID")

fig.tight_layout()