# Inter-Rater Agreement Analysis - Guardian Dataset

## Setup and Helper Functions

### Imports

In [None]:
import json
from pathlib import Path

import biopsykit as bp
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from fau_colors import cmaps, register_fausans_font

from pepbench.annotations import (
    compute_annotation_differences,
    load_annotations_from_dataset,
)
from pepbench.annotations.stats import bin_annotation_differences, compute_icc, describe_annotation_differences
from pepbench.datasets import GuardianDataset

%matplotlib widget
%load_ext autoreload
%autoreload 2

In [None]:
register_fausans_font()
plt.close("all")

palette = sns.color_palette(cmaps.faculties_light)
sns.set_theme(context="notebook", style="ticks", font="sans-serif", palette=palette)

plt.rcParams["figure.figsize"] = (10, 5)
plt.rcParams["pdf.fonttype"] = 42
plt.rcParams["mathtext.default"] = "regular"
plt.rcParams["font.family"] = "sans-serif"
plt.rcParams["font.sans-serif"] = "FAUSans Office"

palette

In [None]:
root_path = Path("../../..")

In [None]:
deploy_type = "local"

config_dict = json.load(root_path.joinpath("config.json").open(encoding="utf-8"))

guardian_base_path = Path(config_dict[deploy_type]["guardian_path"])

In [None]:
paper_path = json.load(root_path.joinpath("paper_path.json").open(encoding="utf-8"))["paper_path"]
paper_path = Path(paper_path)

result_path = root_path.joinpath("results")
export_path = root_path.joinpath("exports")
img_path = export_path.joinpath("plots")
stats_path = export_path.joinpath("stats")

img_path_paper = paper_path.joinpath("img")
tab_path_paper = paper_path.joinpath("tab")
suppl_img_path_paper = paper_path.joinpath("supplementary_material/img")
suppl_tab_path_paper = paper_path.joinpath("supplementary_material/tab")

bp.utils.file_handling.mkdirs(
    [
        result_path,
        export_path,
        img_path,
        stats_path,
        img_path_paper,
        tab_path_paper,
        suppl_img_path_paper,
        suppl_tab_path_paper,
    ]
)

## Guardian Dataset

In [None]:
dataset_guardian_01 = GuardianDataset(guardian_base_path, label_type="rater_01")
dataset_guardian_01

In [None]:
dataset_guardian_02 = GuardianDataset(guardian_base_path, label_type="rater_02")
dataset_guardian_02

In [None]:
labels_total = load_annotations_from_dataset(dataset_guardian_01, dataset_guardian_02)

In [None]:
labels_ecg_total = labels_total.xs("ECG", level="signal")
labels_icg_total = labels_total.xs("ICG", level="signal")

In [None]:
labels_ecg_diff = compute_annotation_differences(
    labels_ecg_total, sampling_rate_hz=dataset_guardian_01.sampling_rate_ecg
)
labels_icg_diff = compute_annotation_differences(
    labels_icg_total, sampling_rate_hz=dataset_guardian_01.sampling_rate_icg
)

labels_icg_diff.head()

In [None]:
fig, ax = plt.subplots()

# label_counts_ecg = labels_ecg_diff.value_counts().sort_index().reset_index()
# label_counts_ecg = label_counts_ecg.assign(sample_difference=label_counts_ecg["sample_difference"].astype(int))

sns.histplot(data=labels_ecg_diff.reset_index(), x="difference_ms", ax=ax, stat="percent")
# sns.barplot(data=label_counts_ecg, x="sample_difference", y="count", ax=ax)

fig.tight_layout()

In [None]:
fig, ax = plt.subplots()

sns.histplot(data=labels_icg_diff, x="difference_ms", ax=ax, stat="percent")

fig.tight_layout()

In [None]:
labels_ecg_diff.abs()

In [None]:
dict_results = {}

In [None]:
annotation_diff_ecg = describe_annotation_differences(labels_ecg_diff)
dict_results["ECG_Annotation_Difference"] = annotation_diff_ecg
annotation_diff_ecg

In [None]:
annotation_diff_icg = describe_annotation_differences(labels_icg_diff)
dict_results["ICG_Annotation_Difference"] = annotation_diff_icg
annotation_diff_icg

In [None]:
labels_ecg_bins = bin_annotation_differences(labels_ecg_diff)
labels_icg_bins = bin_annotation_differences(labels_icg_diff)

In [None]:
ecg_bins = labels_ecg_bins.value_counts(normalize=True).to_frame().T
icg_bins = labels_icg_bins.value_counts(normalize=True).to_frame().T

dict_results["ECG_Agreement_Bins"] = ecg_bins
dict_results["ICG_Agreement_Bins"] = icg_bins

print("ECG")
display(ecg_bins)
print("ICG")
display(icg_bins)

In [None]:
icc_ecg = compute_icc(labels_ecg_total, dataset_guardian_01.sampling_rate_ecg)
dict_results["ECG_ICC"] = icc_ecg
icc_ecg

In [None]:
icc_icg = compute_icc(labels_icg_total, dataset_guardian_01.sampling_rate_icg)
dict_results["ICG_ICC"] = icc_ecg
icc_icg

## Plots

In [None]:
fig, axs = plt.subplots(ncols=2, sharex=True, sharey=True)

labels_ecg_diff_cum = labels_ecg_diff.abs().value_counts().sort_index().cumsum()
labels_ecg_diff_cum = labels_ecg_diff_cum / labels_ecg_diff_cum.iloc[-1] * 100
labels_ecg_diff_cum = labels_ecg_diff_cum.to_frame("count")
labels_ecg_diff_cum.index = pd.Index([i[0] for i in labels_ecg_diff_cum.index], name="Inter-Rater Difference [ms]")
labels_ecg_diff_cum.index = labels_ecg_diff_cum.index / dataset_guardian_01.sampling_rate_ecg * 1000

labels_icg_diff_cum = labels_icg_diff.abs().value_counts().sort_index().cumsum()
labels_icg_diff_cum = labels_icg_diff_cum / labels_icg_diff_cum.iloc[-1] * 100
labels_icg_diff_cum = labels_icg_diff_cum.to_frame("count")
labels_icg_diff_cum.index = pd.Index([i[0] for i in labels_icg_diff_cum.index], name="Inter-Rater Difference [ms]")
labels_icg_diff_cum.index = labels_icg_diff_cum.index / dataset_guardian_01.sampling_rate_icg * 1000

cutoff_ecg = int(0.8 * labels_ecg_diff_cum.iloc[-1].iloc[0])
cutoff_ecg = labels_ecg_diff_cum[(labels_ecg_diff_cum <= cutoff_ecg).diff()].dropna().iloc[-1].name

cutoff_icg = int(0.8 * labels_icg_diff_cum.iloc[-1].iloc[0])
cutoff_icg = labels_icg_diff_cum[(labels_icg_diff_cum <= cutoff_icg).diff()].dropna().iloc[-1].name

print(cutoff_ecg, cutoff_icg)

labels_ecg_diff_cum.plot(ax=axs[0])
labels_icg_diff_cum.plot(ax=axs[1])

axs[0].axhline(100, color=cmaps.tech[0], ls="--")
axs[0].axvline(cutoff_ecg, color=cmaps.med[0], ls="--")
axs[0].axhline(labels_ecg_diff_cum.loc[cutoff_ecg].iloc[0], color=cmaps.med[0], ls="--")
axs[0].scatter(cutoff_ecg, labels_ecg_diff_cum.loc[cutoff_ecg].iloc[0], color=cmaps.med[0])

axs[1].axhline(100, color=cmaps.tech[0], ls="--")
axs[1].axvline(cutoff_icg, color=cmaps.med[0], ls="--")
axs[1].axhline(labels_icg_diff_cum.loc[cutoff_icg].iloc[0], color=cmaps.med[0], ls="--")
axs[1].scatter(cutoff_icg, labels_icg_diff_cum.loc[cutoff_icg].iloc[0], color=cmaps.med[0])

axs[0].set_title("ECG")
axs[1].set_title("ICG")

axs[0].set_ylabel("Cumulative Frequency [%]")
axs[1].set_ylabel("Cumulative Frequency [%]")

axs[0].legend().remove()
axs[1].legend().remove()

fig.tight_layout()

## Export

In [None]:
bp.io.write_pandas_dict_excel(dict_results, export_path.joinpath("result_summary_rater_agreement_guardian.xlsx"))