# Explore the PEP extraction results on the Guardian Data

## Setup and helper functions

In [1]:
import json
from pathlib import Path

from pepbench.datasets import GuardianDataset
from pepbench.io import convert_hz_to_ms, load_challenge_results_from_folder

%matplotlib widget
%load_ext autoreload
%autoreload 2

In [2]:
root_path = Path("../../")

In [3]:
deploy_type = "local"

config_dict = json.load(root_path.joinpath("config.json").open(encoding="utf-8"))

empkins_base_path = Path(config_dict[deploy_type]["empkins_path"])
guardian_base_path = Path(config_dict[deploy_type]["guardian_path"])
print(guardian_base_path)

\Users\sebas\Development\ResearchInternship\Data\2024_08_PEP_Benchmarking\Guardian_Dataset


In [4]:
result_path = root_path.joinpath("results")
result_path

WindowsPath('../../results')

In [5]:
dataset_guardian = GuardianDataset(guardian_base_path, use_cache=True, only_labeled=True)
fs_guardian = dataset_guardian.sampling_rate_icg
print(f"Sampling rate ICG: {fs_guardian}")
dataset_guardian

Sampling rate ICG: 500


Unnamed: 0,participant,phase
0,GDN0005,Pause
1,GDN0005,Valsalva
2,GDN0005,HoldingBreath
3,GDN0005,TiltUp
4,GDN0005,TiltDown
...,...,...
101,GDN0029,TiltUp
102,GDN0029,TiltDown
103,GDN0030,Valsalva
104,GDN0030,HoldingBreath


In [6]:
algo_levels = ["q_wave_algorithm", "b_point_algorithm", "outlier_correction_algorithm"]
algo_level_mapping = dict(
    zip(algo_levels, ["Q-Wave Algorithm", "B-Point Algorithm", "Outlier Correction"], strict=False)
)

In [7]:
results_guardian = load_challenge_results_from_folder(
    result_path.joinpath("guardian_dataset_reference_q_wave").resolve(), index_cols_per_sample=["participant", "phase"]
)

In [8]:
results_guardian_b_point = results_guardian.per_sample.droplevel("q_wave_algorithm")[
    [("heartbeat_id", "reference"), ("b_point_sample", "estimated"), ("b_point_sample", "reference")]
]
results_guardian_b_point = results_guardian_b_point.reset_index()
results_guardian_b_point

Unnamed: 0_level_0,b_point_algorithm,outlier_correction_algorithm,participant,phase,level_4,heartbeat_id,b_point_sample,b_point_sample
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,reference,estimated,reference
0,multiple-conditions,autoregression,GDN0005,Pause,0,0,556.0,568
1,multiple-conditions,autoregression,GDN0005,Pause,1,1,1019.0,1025
2,multiple-conditions,autoregression,GDN0005,Pause,2,2,1453.0,1456
3,multiple-conditions,autoregression,GDN0005,Pause,3,3,1887.0,1888
4,multiple-conditions,autoregression,GDN0005,Pause,4,4,2314.0,2293
...,...,...,...,...,...,...,...,...
79327,third-derivative,none,GDN0030,TiltDown,51,51,27283.0,27278
79328,third-derivative,none,GDN0030,TiltDown,52,52,27824.0,27815
79329,third-derivative,none,GDN0030,TiltDown,53,53,28371.0,28345
79330,third-derivative,none,GDN0030,TiltDown,54,54,28903.0,28867


In [9]:
results_guardian_b_point.columns = results_guardian_b_point.columns.to_flat_index().str.join("")
results_guardian_b_point = results_guardian_b_point.drop(columns=["level_4"])
results_guardian_b_point

Unnamed: 0,b_point_algorithm,outlier_correction_algorithm,participant,phase,heartbeat_idreference,b_point_sampleestimated,b_point_samplereference
0,multiple-conditions,autoregression,GDN0005,Pause,0,556.0,568
1,multiple-conditions,autoregression,GDN0005,Pause,1,1019.0,1025
2,multiple-conditions,autoregression,GDN0005,Pause,2,1453.0,1456
3,multiple-conditions,autoregression,GDN0005,Pause,3,1887.0,1888
4,multiple-conditions,autoregression,GDN0005,Pause,4,2314.0,2293
...,...,...,...,...,...,...,...
79327,third-derivative,none,GDN0030,TiltDown,51,27283.0,27278
79328,third-derivative,none,GDN0030,TiltDown,52,27824.0,27815
79329,third-derivative,none,GDN0030,TiltDown,53,28371.0,28345
79330,third-derivative,none,GDN0030,TiltDown,54,28903.0,28867


In [10]:
results_guardian_b_point["b_point_algorithm_combi"] = (
    results_guardian_b_point["b_point_algorithm"] + "_" + results_guardian_b_point["outlier_correction_algorithm"]
)
results_guardian_b_point = results_guardian_b_point.drop(columns=["b_point_algorithm", "outlier_correction_algorithm"])
results_guardian_b_point.reindex(level=["participant", "phase"])

Unnamed: 0,participant,phase,heartbeat_idreference,b_point_sampleestimated,b_point_samplereference,b_point_algorithm_combi
0,GDN0005,Pause,0,556.0,568,multiple-conditions_autoregression
1,GDN0005,Pause,1,1019.0,1025,multiple-conditions_autoregression
2,GDN0005,Pause,2,1453.0,1456,multiple-conditions_autoregression
3,GDN0005,Pause,3,1887.0,1888,multiple-conditions_autoregression
4,GDN0005,Pause,4,2314.0,2293,multiple-conditions_autoregression
...,...,...,...,...,...,...
79327,GDN0030,TiltDown,51,27283.0,27278,third-derivative_none
79328,GDN0030,TiltDown,52,27824.0,27815,third-derivative_none
79329,GDN0030,TiltDown,53,28371.0,28345,third-derivative_none
79330,GDN0030,TiltDown,54,28903.0,28867,third-derivative_none


In [11]:
print("Possible B-Point algorithm combinations: ")
algo_list = results_guardian_b_point["b_point_algorithm_combi"].unique()
print(f"There are {len(algo_list)} possible algorithm combinations:")
algo_list

Possible B-Point algorithm combinations: 
There are 12 possible algorithm combinations:


array(['multiple-conditions_autoregression',
       'multiple-conditions_linear-interpolation',
       'multiple-conditions_none', 'second-derivative_autoregression',
       'second-derivative_linear-interpolation', 'second-derivative_none',
       'straight-line_autoregression',
       'straight-line_linear-interpolation', 'straight-line_none',
       'third-derivative_autoregression',
       'third-derivative_linear-interpolation', 'third-derivative_none'],
      dtype=object)

In [12]:
print(f"Number of rows in the long dataframe: {results_guardian_b_point.shape[0]}")
print(f"Number of possible B-Point algorithm combinations: {len(algo_list)}")
print(f"Expected amount of rows in the wide datframe: {results_guardian_b_point.shape[0] / len(algo_list)}")

Number of rows in the long dataframe: 79332
Number of possible B-Point algorithm combinations: 12
Expected amount of rows in the wide datframe: 6611.0


### Convert the dataframe from the long to the wide format

In [13]:
# Pivot the DataFrame
df_guardian_pivot = results_guardian_b_point.pivot_table(
    index=["participant", "phase", "heartbeat_idreference", "b_point_samplereference"],
    columns="b_point_algorithm_combi",
    values="b_point_sampleestimated",
).reset_index()

# Flatten the columns
df_guardian_pivot.columns.name = None
df_guardian_pivot.columns = [f"{col}" if isinstance(col, str) else f"{col[1]}" for col in df_guardian_pivot.columns]
df_guardian_pivot

Unnamed: 0,participant,phase,heartbeat_idreference,b_point_samplereference,multiple-conditions_autoregression,multiple-conditions_linear-interpolation,multiple-conditions_none,second-derivative_autoregression,second-derivative_linear-interpolation,second-derivative_none,straight-line_autoregression,straight-line_linear-interpolation,straight-line_none,third-derivative_autoregression,third-derivative_linear-interpolation,third-derivative_none
0,GDN0005,HoldingBreath,0,540,705.0,705.0,705.0,572.0,572.0,572.0,546.0,546.0,546.0,543.0,543.0,543.0
1,GDN0005,HoldingBreath,1,973,1068.0,1072.0,1014.0,984.0,993.0,965.0,981.0,981.0,981.0,964.0,964.0,939.0
2,GDN0005,HoldingBreath,3,1741,1757.0,1760.0,1740.0,1736.0,1736.0,1736.0,1746.0,1746.0,1746.0,1746.0,1746.0,1746.0
3,GDN0005,HoldingBreath,4,2121,2120.0,2120.0,2120.0,2114.0,2114.0,2114.0,2124.0,2124.0,2124.0,2124.0,2124.0,2124.0
4,GDN0005,HoldingBreath,5,2499,2498.0,2498.0,2498.0,2515.0,2515.0,2515.0,2502.0,2507.0,2502.0,2500.0,2500.0,2500.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6239,GDN0030,Valsalva,51,27597,27582.0,27582.0,27582.0,27596.0,27596.0,27596.0,27603.0,27603.0,27603.0,27606.0,27606.0,27606.0
6240,GDN0030,Valsalva,52,28086,28074.0,28074.0,28074.0,28085.0,28085.0,28085.0,28091.0,28091.0,28091.0,28092.0,28092.0,28092.0
6241,GDN0030,Valsalva,53,28581,28595.0,28595.0,28595.0,28602.0,28602.0,28602.0,28592.0,28592.0,28592.0,28612.0,28612.0,28612.0
6242,GDN0030,Valsalva,54,29061,29058.0,29058.0,29058.0,29089.0,29089.0,29089.0,29091.0,29091.0,29091.0,29097.0,29097.0,29097.0


### Check whether differences in the B-Point samples occur between the autoregression and linear interpolation algorithm combinations

In [14]:
df_guardian_pivot[
    (
        df_guardian_pivot["multiple-conditions_autoregression"]
        == df_guardian_pivot["multiple-conditions_linear-interpolation"]
    )
    is False
]

Unnamed: 0,participant,phase,heartbeat_idreference,b_point_samplereference,multiple-conditions_autoregression,multiple-conditions_linear-interpolation,multiple-conditions_none,second-derivative_autoregression,second-derivative_linear-interpolation,second-derivative_none,straight-line_autoregression,straight-line_linear-interpolation,straight-line_none,third-derivative_autoregression,third-derivative_linear-interpolation,third-derivative_none
1,GDN0005,HoldingBreath,1,973,1068.0,1072.0,1014.0,984.0,993.0,965.0,981.0,981.0,981.0,964.0,964.0,939.0
2,GDN0005,HoldingBreath,3,1741,1757.0,1760.0,1740.0,1736.0,1736.0,1736.0,1746.0,1746.0,1746.0,1746.0,1746.0,1746.0
5,GDN0005,HoldingBreath,6,2892,2880.0,2890.0,2854.0,2878.0,2878.0,2878.0,2899.0,2899.0,2899.0,2891.0,2895.0,2857.0
7,GDN0005,HoldingBreath,9,4117,4089.0,4100.0,,4114.0,4114.0,4114.0,4122.0,4122.0,4122.0,4123.0,4123.0,4123.0
9,GDN0005,HoldingBreath,11,4920,4913.0,4916.0,4898.0,4913.0,4913.0,4913.0,4932.0,4932.0,4932.0,4934.0,4934.0,4934.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6197,GDN0030,Valsalva,9,5414,5389.0,5382.0,5369.0,5437.0,5437.0,5437.0,5418.0,5419.0,5376.0,5417.0,5417.0,5417.0
6198,GDN0030,Valsalva,10,5853,5836.0,5839.0,5820.0,5867.0,5873.0,5852.0,5864.0,5864.0,5864.0,5858.0,5860.0,5835.0
6207,GDN0030,Valsalva,19,11111,11090.0,11092.0,,11130.0,11130.0,11130.0,11104.0,11105.0,11081.0,11136.0,11136.0,11136.0
6213,GDN0030,Valsalva,25,14343,14288.0,14289.0,14264.0,14339.0,14339.0,14339.0,14321.0,14321.0,14321.0,14320.0,14320.0,14320.0


In [15]:
print(f"Max heartbeat id long dataframe: {max(results_guardian_b_point['heartbeat_idreference'])}")
print(f"Max heartbeat id wide dataframe: {max(df_guardian_pivot['heartbeat_idreference'])}")

Max heartbeat id long dataframe: 97
Max heartbeat id wide dataframe: 97


### Save the pivot dataframe

In [16]:
df_guardian_pivot

Unnamed: 0,participant,phase,heartbeat_idreference,b_point_samplereference,multiple-conditions_autoregression,multiple-conditions_linear-interpolation,multiple-conditions_none,second-derivative_autoregression,second-derivative_linear-interpolation,second-derivative_none,straight-line_autoregression,straight-line_linear-interpolation,straight-line_none,third-derivative_autoregression,third-derivative_linear-interpolation,third-derivative_none
0,GDN0005,HoldingBreath,0,540,705.0,705.0,705.0,572.0,572.0,572.0,546.0,546.0,546.0,543.0,543.0,543.0
1,GDN0005,HoldingBreath,1,973,1068.0,1072.0,1014.0,984.0,993.0,965.0,981.0,981.0,981.0,964.0,964.0,939.0
2,GDN0005,HoldingBreath,3,1741,1757.0,1760.0,1740.0,1736.0,1736.0,1736.0,1746.0,1746.0,1746.0,1746.0,1746.0,1746.0
3,GDN0005,HoldingBreath,4,2121,2120.0,2120.0,2120.0,2114.0,2114.0,2114.0,2124.0,2124.0,2124.0,2124.0,2124.0,2124.0
4,GDN0005,HoldingBreath,5,2499,2498.0,2498.0,2498.0,2515.0,2515.0,2515.0,2502.0,2507.0,2502.0,2500.0,2500.0,2500.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6239,GDN0030,Valsalva,51,27597,27582.0,27582.0,27582.0,27596.0,27596.0,27596.0,27603.0,27603.0,27603.0,27606.0,27606.0,27606.0
6240,GDN0030,Valsalva,52,28086,28074.0,28074.0,28074.0,28085.0,28085.0,28085.0,28091.0,28091.0,28091.0,28092.0,28092.0,28092.0
6241,GDN0030,Valsalva,53,28581,28595.0,28595.0,28595.0,28602.0,28602.0,28602.0,28592.0,28592.0,28592.0,28612.0,28612.0,28612.0
6242,GDN0030,Valsalva,54,29061,29058.0,29058.0,29058.0,29089.0,29089.0,29089.0,29091.0,29091.0,29091.0,29097.0,29097.0,29097.0


In [17]:
# df_guardian_pivot.to_csv(result_path.joinpath("pivot_dataframe_b_point/guardian_pivot_dataframe_b_point_algos.csv"))

### Convert samples to ms

In [18]:
df_guardian_pivot_ms = df_guardian_pivot.copy()
exclude_cols = ["participant", "condition", "phase", "heartbeat_idreference"]
df_guardian_pivot_ms.loc[:, ~df_guardian_pivot_ms.columns.isin(exclude_cols)] = df_guardian_pivot_ms.loc[
    :, ~df_guardian_pivot_ms.columns.isin(exclude_cols)
].apply(lambda x: x * convert_hz_to_ms(fs_guardian))
df_guardian_pivot_ms

  df_guardian_pivot_ms.loc[:, ~df_guardian_pivot_ms.columns.isin(exclude_cols)] = df_guardian_pivot_ms.loc[:, ~df_guardian_pivot_ms.columns.isin(exclude_cols)].apply(lambda x: x * convert_hz_to_ms(fs_guardian))


Unnamed: 0,participant,phase,heartbeat_idreference,b_point_samplereference,multiple-conditions_autoregression,multiple-conditions_linear-interpolation,multiple-conditions_none,second-derivative_autoregression,second-derivative_linear-interpolation,second-derivative_none,straight-line_autoregression,straight-line_linear-interpolation,straight-line_none,third-derivative_autoregression,third-derivative_linear-interpolation,third-derivative_none
0,GDN0005,HoldingBreath,0,1080.0,1410.0,1410.0,1410.0,1144.0,1144.0,1144.0,1092.0,1092.0,1092.0,1086.0,1086.0,1086.0
1,GDN0005,HoldingBreath,1,1946.0,2136.0,2144.0,2028.0,1968.0,1986.0,1930.0,1962.0,1962.0,1962.0,1928.0,1928.0,1878.0
2,GDN0005,HoldingBreath,3,3482.0,3514.0,3520.0,3480.0,3472.0,3472.0,3472.0,3492.0,3492.0,3492.0,3492.0,3492.0,3492.0
3,GDN0005,HoldingBreath,4,4242.0,4240.0,4240.0,4240.0,4228.0,4228.0,4228.0,4248.0,4248.0,4248.0,4248.0,4248.0,4248.0
4,GDN0005,HoldingBreath,5,4998.0,4996.0,4996.0,4996.0,5030.0,5030.0,5030.0,5004.0,5014.0,5004.0,5000.0,5000.0,5000.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6239,GDN0030,Valsalva,51,55194.0,55164.0,55164.0,55164.0,55192.0,55192.0,55192.0,55206.0,55206.0,55206.0,55212.0,55212.0,55212.0
6240,GDN0030,Valsalva,52,56172.0,56148.0,56148.0,56148.0,56170.0,56170.0,56170.0,56182.0,56182.0,56182.0,56184.0,56184.0,56184.0
6241,GDN0030,Valsalva,53,57162.0,57190.0,57190.0,57190.0,57204.0,57204.0,57204.0,57184.0,57184.0,57184.0,57224.0,57224.0,57224.0
6242,GDN0030,Valsalva,54,58122.0,58116.0,58116.0,58116.0,58178.0,58178.0,58178.0,58182.0,58182.0,58182.0,58194.0,58194.0,58194.0


### Save the converted dataframe

In [20]:
# df_guardian_pivot_ms.to_csv(result_path.joinpath("pivot_dataframe_b_point/guardian_pivot_dataframe_b_point_algos_ms.csv"))