# Explore the PEP extraction results on the Guardian Data

## Setup and helper functions

In [1]:
import json

from pathlib import Path

import pandas as pd
import numpy as np

import biopsykit as bp

import matplotlib.pyplot as plt

from pepbench.io import load_challenge_results_from_folder
from pepbench.io import convert_hz_to_ms
from pepbench.datasets import GuardianDataset

%matplotlib widget
%load_ext autoreload
%autoreload 2

In [2]:
root_path = Path("../../")

In [3]:
deploy_type = "local"

config_dict = json.load(root_path.joinpath("config.json").open(encoding="utf-8"))

empkins_base_path = Path(config_dict[deploy_type]["empkins_path"])
guardian_base_path = Path(config_dict[deploy_type]["guardian_path"])
print(guardian_base_path)

\Users\sebas\Development\ResearchInternship\Data\2024_08_PEP_Benchmarking\Guardian_Dataset


#### Specify whether to save the results or not

In [4]:
save_results = True

In [5]:
result_path = root_path.joinpath("results")
result_path

WindowsPath('../../results')

In [6]:
dataset_guardian = GuardianDataset(guardian_base_path, use_cache=True, only_labeled=True)
fs_guardian = dataset_guardian.sampling_rate_icg
print(f"Sampling rate ICG: {fs_guardian}")
dataset_guardian

Sampling rate ICG: 500


Unnamed: 0,participant,phase
0,GDN0005,Pause
1,GDN0005,Valsalva
2,GDN0005,HoldingBreath
3,GDN0005,TiltUp
4,GDN0005,TiltDown
...,...,...
101,GDN0029,TiltUp
102,GDN0029,TiltDown
103,GDN0030,Valsalva
104,GDN0030,HoldingBreath


In [7]:
algo_levels = ["q_wave_algorithm", "b_point_algorithm", "outlier_correction_algorithm"]
algo_level_mapping = dict(zip(algo_levels, ["Q-Wave Algorithm", "B-Point Algorithm", "Outlier Correction"]))

In [8]:
results_guardian = load_challenge_results_from_folder(
    result_path.joinpath("guardian_dataset_reference_q_wave").resolve(), index_cols_per_sample=["participant", "phase"]
)

In [9]:
results_guardian_b_point = results_guardian.per_sample.droplevel("q_wave_algorithm")[[("heartbeat_id", "reference"), ("b_point_sample", "estimated"), ("b_point_sample", "reference")]]
results_guardian_b_point = results_guardian_b_point.reset_index()
results_guardian_b_point

Unnamed: 0_level_0,b_point_algorithm,outlier_correction_algorithm,participant,phase,level_4,heartbeat_id,b_point_sample,b_point_sample
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,reference,estimated,reference
0,arbol2017-isoelectric-crossings,forouzanfar2018,GDN0005,Pause,0,0,558.0,568
1,arbol2017-isoelectric-crossings,forouzanfar2018,GDN0005,Pause,1,1,1009.0,1025
2,arbol2017-isoelectric-crossings,forouzanfar2018,GDN0005,Pause,2,2,1463.0,1456
3,arbol2017-isoelectric-crossings,forouzanfar2018,GDN0005,Pause,3,3,1900.0,1888
4,arbol2017-isoelectric-crossings,forouzanfar2018,GDN0005,Pause,4,4,2320.0,2293
...,...,...,...,...,...,...,...,...
198325,stern1985,none,GDN0030,TiltDown,51,51,27277.0,27278
198326,stern1985,none,GDN0030,TiltDown,52,52,27788.0,27815
198327,stern1985,none,GDN0030,TiltDown,53,53,28345.0,28345
198328,stern1985,none,GDN0030,TiltDown,54,54,28846.0,28867


In [10]:
results_guardian_b_point.columns = results_guardian_b_point.columns.to_flat_index().str.join("")
results_guardian_b_point = results_guardian_b_point.drop(columns=["level_4"])
results_guardian_b_point

Unnamed: 0,b_point_algorithm,outlier_correction_algorithm,participant,phase,heartbeat_idreference,b_point_sampleestimated,b_point_samplereference
0,arbol2017-isoelectric-crossings,forouzanfar2018,GDN0005,Pause,0,558.0,568
1,arbol2017-isoelectric-crossings,forouzanfar2018,GDN0005,Pause,1,1009.0,1025
2,arbol2017-isoelectric-crossings,forouzanfar2018,GDN0005,Pause,2,1463.0,1456
3,arbol2017-isoelectric-crossings,forouzanfar2018,GDN0005,Pause,3,1900.0,1888
4,arbol2017-isoelectric-crossings,forouzanfar2018,GDN0005,Pause,4,2320.0,2293
...,...,...,...,...,...,...,...
198325,stern1985,none,GDN0030,TiltDown,51,27277.0,27278
198326,stern1985,none,GDN0030,TiltDown,52,27788.0,27815
198327,stern1985,none,GDN0030,TiltDown,53,28345.0,28345
198328,stern1985,none,GDN0030,TiltDown,54,28846.0,28867


In [11]:
results_guardian_b_point["b_point_algorithm_combi"] = results_guardian_b_point["b_point_algorithm"] + "_" + results_guardian_b_point["outlier_correction_algorithm"]
results_guardian_b_point = results_guardian_b_point.drop(columns=["b_point_algorithm", "outlier_correction_algorithm"])
results_guardian_b_point.reindex(level=["participant", "phase"])

Unnamed: 0,participant,phase,heartbeat_idreference,b_point_sampleestimated,b_point_samplereference,b_point_algorithm_combi
0,GDN0005,Pause,0,558.0,568,arbol2017-isoelectric-crossings_forouzanfar2018
1,GDN0005,Pause,1,1009.0,1025,arbol2017-isoelectric-crossings_forouzanfar2018
2,GDN0005,Pause,2,1463.0,1456,arbol2017-isoelectric-crossings_forouzanfar2018
3,GDN0005,Pause,3,1900.0,1888,arbol2017-isoelectric-crossings_forouzanfar2018
4,GDN0005,Pause,4,2320.0,2293,arbol2017-isoelectric-crossings_forouzanfar2018
...,...,...,...,...,...,...
198325,GDN0030,TiltDown,51,27277.0,27278,stern1985_none
198326,GDN0030,TiltDown,52,27788.0,27815,stern1985_none
198327,GDN0030,TiltDown,53,28345.0,28345,stern1985_none
198328,GDN0030,TiltDown,54,28846.0,28867,stern1985_none


In [12]:
print("Possible B-Point algorithm combinations: ")
algo_list = results_guardian_b_point["b_point_algorithm_combi"].unique()
print(f"There are {len(algo_list)} possible algorithm combinations:")
algo_list

Possible B-Point algorithm combinations: 
There are 30 possible algorithm combinations:


array(['arbol2017-isoelectric-crossings_forouzanfar2018',
       'arbol2017-isoelectric-crossings_linear-interpolation',
       'arbol2017-isoelectric-crossings_none',
       'arbol2017-second-derivative_forouzanfar2018',
       'arbol2017-second-derivative_linear-interpolation',
       'arbol2017-second-derivative_none',
       'arbol2017-third-derivative_forouzanfar2018',
       'arbol2017-third-derivative_linear-interpolation',
       'arbol2017-third-derivative_none',
       'debski1993-second-derivative_forouzanfar2018',
       'debski1993-second-derivative_linear-interpolation',
       'debski1993-second-derivative_none', 'drost2022_forouzanfar2018',
       'drost2022_linear-interpolation', 'drost2022_none',
       'forounzafar2018_forouzanfar2018',
       'forounzafar2018_linear-interpolation', 'forounzafar2018_none',
       'lozano2007-linear-regression_forouzanfar2018',
       'lozano2007-linear-regression_linear-interpolation',
       'lozano2007-linear-regression_none',
    

In [13]:
print(f"Number of rows in the long dataframe: {results_guardian_b_point.shape[0]}")
print(f"Number of possible B-Point algorithm combinations: {len(algo_list)}")
print(f"Expected amount of rows in the wide datframe: {results_guardian_b_point.shape[0]/len(algo_list)}")

Number of rows in the long dataframe: 198330
Number of possible B-Point algorithm combinations: 30
Expected amount of rows in the wide datframe: 6611.0


### Convert the dataframe from the long to the wide format

In [14]:
# Pivot the DataFrame
df_guardian_pivot = results_guardian_b_point.pivot_table(
    index=['participant', 'phase', 'heartbeat_idreference', 'b_point_samplereference'],
    columns='b_point_algorithm_combi',
    values='b_point_sampleestimated'
).reset_index()

# Flatten the columns
df_guardian_pivot.columns.name = None
df_guardian_pivot.columns = [f'{col}' if isinstance(col, str) else f'{col[1]}' for col in df_guardian_pivot.columns]
df_guardian_pivot

Unnamed: 0,participant,phase,heartbeat_idreference,b_point_samplereference,arbol2017-isoelectric-crossings_forouzanfar2018,arbol2017-isoelectric-crossings_linear-interpolation,arbol2017-isoelectric-crossings_none,arbol2017-second-derivative_forouzanfar2018,arbol2017-second-derivative_linear-interpolation,arbol2017-second-derivative_none,...,lozano2007-linear-regression_none,lozano2007-quadratic-regression_forouzanfar2018,lozano2007-quadratic-regression_linear-interpolation,lozano2007-quadratic-regression_none,sherwood1990_forouzanfar2018,sherwood1990_linear-interpolation,sherwood1990_none,stern1985_forouzanfar2018,stern1985_linear-interpolation,stern1985_none
0,GDN0005,HoldingBreath,0,540,565.0,565.0,565.0,545.0,545.0,545.0,...,552.0,538.0,538.0,538.0,567.0,567.0,567.0,540.0,540.0,540.0
1,GDN0005,HoldingBreath,1,973,981.0,982.0,941.0,946.0,946.0,946.0,...,973.0,963.0,963.0,963.0,982.0,981.0,936.0,972.0,972.0,972.0
2,GDN0005,HoldingBreath,3,1741,1744.0,1744.0,1744.0,1701.0,1701.0,1701.0,...,1736.0,1727.0,1727.0,1727.0,1744.0,1744.0,1744.0,1740.0,1740.0,1740.0
3,GDN0005,HoldingBreath,4,2121,2123.0,2123.0,2123.0,2098.0,2098.0,2098.0,...,2112.0,2103.0,2105.0,2100.0,2123.0,2123.0,2123.0,2120.0,2120.0,2120.0
4,GDN0005,HoldingBreath,5,2499,2505.0,2505.0,2505.0,2462.0,2462.0,2462.0,...,2495.0,2486.0,2489.0,2483.0,2506.0,2506.0,2506.0,2498.0,2498.0,2498.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6239,GDN0030,Valsalva,51,27597,27581.0,27581.0,27581.0,27554.0,27554.0,27554.0,...,27588.0,27580.0,27580.0,27580.0,27579.0,27579.0,27579.0,27571.0,27571.0,27571.0
6240,GDN0030,Valsalva,52,28086,28073.0,28073.0,28073.0,28043.0,28043.0,28043.0,...,28078.0,28069.0,28069.0,28069.0,28072.0,28072.0,28072.0,28062.0,28062.0,28062.0
6241,GDN0030,Valsalva,53,28581,28591.0,28591.0,28591.0,28557.0,28557.0,28557.0,...,28593.0,28586.0,28586.0,28586.0,28594.0,28594.0,28594.0,28580.0,28580.0,28580.0
6242,GDN0030,Valsalva,54,29061,29074.0,29074.0,29074.0,29045.0,29045.0,29045.0,...,29081.0,29072.0,29072.0,29072.0,29069.0,29081.0,29069.0,29062.0,29063.0,29033.0


In [15]:
print(f"Max heartbeat id long dataframe: {max(results_guardian_b_point["heartbeat_idreference"])}")
print(f"Max heartbeat id wide dataframe: {max(df_guardian_pivot["heartbeat_idreference"])}")

Max heartbeat id long dataframe: 97
Max heartbeat id wide dataframe: 97


### Save the pivot dataframe

In [16]:
if save_results:
    df_guardian_pivot.to_csv(result_path.joinpath("pivot_dataframe_b_point/guardian_pivot_dataframe_b_point_all_algos.csv"))

### Convert samples to ms

In [17]:
df_guardian_pivot_ms = df_guardian_pivot.copy()
exclude_cols = ["participant", "condition", "phase", "heartbeat_idreference"]
df_guardian_pivot_ms.loc[:, ~df_guardian_pivot_ms.columns.isin(exclude_cols)] = df_guardian_pivot_ms.loc[:, ~df_guardian_pivot_ms.columns.isin(exclude_cols)].apply(lambda x: x * convert_hz_to_ms(fs_guardian))
df_guardian_pivot_ms                                                                                  

  df_guardian_pivot_ms.loc[:, ~df_guardian_pivot_ms.columns.isin(exclude_cols)] = df_guardian_pivot_ms.loc[:, ~df_guardian_pivot_ms.columns.isin(exclude_cols)].apply(lambda x: x * convert_hz_to_ms(fs_guardian))


Unnamed: 0,participant,phase,heartbeat_idreference,b_point_samplereference,arbol2017-isoelectric-crossings_forouzanfar2018,arbol2017-isoelectric-crossings_linear-interpolation,arbol2017-isoelectric-crossings_none,arbol2017-second-derivative_forouzanfar2018,arbol2017-second-derivative_linear-interpolation,arbol2017-second-derivative_none,...,lozano2007-linear-regression_none,lozano2007-quadratic-regression_forouzanfar2018,lozano2007-quadratic-regression_linear-interpolation,lozano2007-quadratic-regression_none,sherwood1990_forouzanfar2018,sherwood1990_linear-interpolation,sherwood1990_none,stern1985_forouzanfar2018,stern1985_linear-interpolation,stern1985_none
0,GDN0005,HoldingBreath,0,1080.0,1130.0,1130.0,1130.0,1090.0,1090.0,1090.0,...,1104.0,1076.0,1076.0,1076.0,1134.0,1134.0,1134.0,1080.0,1080.0,1080.0
1,GDN0005,HoldingBreath,1,1946.0,1962.0,1964.0,1882.0,1892.0,1892.0,1892.0,...,1946.0,1926.0,1926.0,1926.0,1964.0,1962.0,1872.0,1944.0,1944.0,1944.0
2,GDN0005,HoldingBreath,3,3482.0,3488.0,3488.0,3488.0,3402.0,3402.0,3402.0,...,3472.0,3454.0,3454.0,3454.0,3488.0,3488.0,3488.0,3480.0,3480.0,3480.0
3,GDN0005,HoldingBreath,4,4242.0,4246.0,4246.0,4246.0,4196.0,4196.0,4196.0,...,4224.0,4206.0,4210.0,4200.0,4246.0,4246.0,4246.0,4240.0,4240.0,4240.0
4,GDN0005,HoldingBreath,5,4998.0,5010.0,5010.0,5010.0,4924.0,4924.0,4924.0,...,4990.0,4972.0,4978.0,4966.0,5012.0,5012.0,5012.0,4996.0,4996.0,4996.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6239,GDN0030,Valsalva,51,55194.0,55162.0,55162.0,55162.0,55108.0,55108.0,55108.0,...,55176.0,55160.0,55160.0,55160.0,55158.0,55158.0,55158.0,55142.0,55142.0,55142.0
6240,GDN0030,Valsalva,52,56172.0,56146.0,56146.0,56146.0,56086.0,56086.0,56086.0,...,56156.0,56138.0,56138.0,56138.0,56144.0,56144.0,56144.0,56124.0,56124.0,56124.0
6241,GDN0030,Valsalva,53,57162.0,57182.0,57182.0,57182.0,57114.0,57114.0,57114.0,...,57186.0,57172.0,57172.0,57172.0,57188.0,57188.0,57188.0,57160.0,57160.0,57160.0
6242,GDN0030,Valsalva,54,58122.0,58148.0,58148.0,58148.0,58090.0,58090.0,58090.0,...,58162.0,58144.0,58144.0,58144.0,58138.0,58162.0,58138.0,58124.0,58126.0,58066.0


### Save the converted dataframe

In [18]:
if save_results:
    df_guardian_pivot_ms.to_csv(result_path.joinpath("pivot_dataframe_b_point/guardian_pivot_dataframe_b_point_all_algos_ms.csv"))