In [1]:
import json

from pathlib import Path

import pandas as pd
import numpy as np

import biopsykit as bp

import matplotlib.pyplot as plt

from pepbench.io import load_challenge_results_from_folder
from pepbench.io import convert_hz_to_ms
from pepbench.datasets import EmpkinsDataset, GuardianDataset

%matplotlib widget
%load_ext autoreload
%autoreload 2

In [2]:
root_path = Path("../../")

In [3]:
result_path = root_path.joinpath("results")
result_path

WindowsPath('../../results')

In [4]:
deploy_type = "local"

config_dict = json.load(root_path.joinpath("config.json").open(encoding="utf-8"))

empkins_base_path = Path(config_dict[deploy_type]["empkins_path"])
guardian_base_path = Path(config_dict[deploy_type]["guardian_path"])
print(empkins_base_path)

\Users\sebas\Development\ResearchInternship\Data\2024_08_PEP_Benchmarking\EmpkinS_Dataset


In [5]:
save_results = True

In [6]:
dataset_empkins = EmpkinsDataset(empkins_base_path, use_cache=True, only_labeled=True)
fs_empkins = dataset_empkins.sampling_rate_icg
print(f"Sampling rate ICG: {fs_empkins}")
dataset_guardian = GuardianDataset(guardian_base_path, use_cache=True, only_labeled=True)
fs_guardian = dataset_guardian.sampling_rate_icg
print(f"Sampling rate ICG: {fs_guardian}")

Sampling rate ICG: 1000
Sampling rate ICG: 500


In [7]:
algo_levels = ["q_wave_algorithm", "b_point_algorithm", "outlier_correction_algorithm"]
algo_level_mapping = dict(zip(algo_levels, ["Q-Wave Algorithm", "B-Point Algorithm", "Outlier Correction"]))

In [8]:
results_empkins = load_challenge_results_from_folder(
    result_path.joinpath("empkins_dataset_reference_q_wave").resolve(), index_cols_per_sample=["participant", "condition", "phase"]
).per_sample
results_guardian = load_challenge_results_from_folder(
    result_path.joinpath("guardian_dataset_reference_q_wave").resolve(), index_cols_per_sample=["participant", "phase"]
).per_sample

In [9]:
results_empkins = results_empkins.droplevel("condition")

In [10]:
results_empkins_norm = results_empkins.copy()
results_guardian_norm = results_guardian.copy()
results_empkins_norm[("b_point_sample", "estimated")] = results_empkins[("b_point_sample", "estimated")] - results_empkins[("heartbeat_start_sample", "estimated")]
results_empkins_norm[("b_point_sample", "reference")] = results_empkins[("b_point_sample", "reference")] - results_empkins[("heartbeat_start_sample", "estimated")]
results_guardian_norm[("b_point_sample", "estimated")] = results_guardian[("b_point_sample", "estimated")] - results_guardian[("heartbeat_start_sample", "estimated")]
results_guardian_norm[("b_point_sample", "reference")] = results_guardian[("b_point_sample", "reference")] - results_guardian[("heartbeat_start_sample", "estimated")]

In [11]:
results_guardian_norm

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,heartbeat_id,heartbeat_id,heartbeat_start_sample,heartbeat_start_sample,heartbeat_end_sample,heartbeat_end_sample,q_wave_onset_sample,q_wave_onset_sample,b_point_sample,b_point_sample,rr_interval_ms,pep_sample,pep_sample,pep_ms,pep_ms,nan_reason,nan_reason,error_per_sample_ms,absolute_error_per_sample_ms,absolute_relative_error_per_sample_percent
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,estimated,reference,estimated,reference,estimated,reference,estimated,reference,estimated,reference,estimated,estimated,reference,estimated,reference,estimated,reference,metric,metric,metric
q_wave_algorithm,b_point_algorithm,outlier_correction_algorithm,participant,phase,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2
q-wave-reference,arbol2017-isoelectric-crossings,forouzanfar2018,GDN0005,Pause,0,0,0,352,352,808,808,492,492,206.0,216,912.0,66.0,76,132.0,152,,,20.0,20.0,13.157895
q-wave-reference,arbol2017-isoelectric-crossings,forouzanfar2018,GDN0005,Pause,1,1,1,808,808,1252,1252,948,948,201.0,217,874.0,61.0,77,122.0,154,,,32.0,32.0,20.779221
q-wave-reference,arbol2017-isoelectric-crossings,forouzanfar2018,GDN0005,Pause,2,2,2,1252,1252,1686,1686,1386,1386,211.0,204,864.0,77.0,70,154.0,140,,,-14.0,14.0,10.000000
q-wave-reference,arbol2017-isoelectric-crossings,forouzanfar2018,GDN0005,Pause,3,3,3,1686,1686,2116,2115,1817,1817,214.0,202,858.0,83.0,71,166.0,142,,,-24.0,24.0,16.901408
q-wave-reference,arbol2017-isoelectric-crossings,forouzanfar2018,GDN0005,Pause,4,4,4,2116,2115,2570,2570,2246,2246,204.0,177,936.0,74.0,47,148.0,94,,,-54.0,54.0,57.446809
q-wave-reference,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
q-wave-reference,stern1985,none,GDN0030,TiltDown,51,51,51,27029,27029,27570,27570,27200,27200,248.0,249,1080.0,77.0,78,154.0,156,,,2.0,2.0,1.282051
q-wave-reference,stern1985,none,GDN0030,TiltDown,52,52,52,27570,27570,28118,28118,27740,27740,218.0,245,1106.0,48.0,75,96.0,150,,,54.0,54.0,36.000000
q-wave-reference,stern1985,none,GDN0030,TiltDown,53,53,53,28118,28118,28652,28652,28293,28293,227.0,227,1046.0,52.0,52,104.0,104,,,0.0,0.0,0.000000
q-wave-reference,stern1985,none,GDN0030,TiltDown,54,54,54,28652,28652,29167,29167,28816,28816,194.0,215,1022.0,30.0,51,60.0,102,,,42.0,42.0,41.176471


In [12]:
cols = [("heartbeat_start_sample", "estimated"), ("heartbeat_start_sample", "reference"), ("heartbeat_end_sample", "estimated"), ("heartbeat_end_sample", "reference"), ("q_wave_onset_sample", "estimated"), ("q_wave_onset_sample", "reference"), ("b_point_sample", "estimated"), ("b_point_sample", "reference"), ("pep_sample", "estimated"), ("pep_sample", "reference")]
results_empkins_cleaned_ms = results_empkins_norm.copy()
results_guardian_cleaned_ms = results_guardian_norm.copy()
results_empkins_cleaned_ms[cols] = results_empkins_norm[cols] * convert_hz_to_ms(sampling_frequency=fs_empkins)
results_guardian_cleaned_ms[cols] = results_guardian_norm[cols] * convert_hz_to_ms(sampling_frequency=fs_guardian)

In [13]:
results_guardian_cleaned_ms

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,heartbeat_id,heartbeat_id,heartbeat_start_sample,heartbeat_start_sample,heartbeat_end_sample,heartbeat_end_sample,q_wave_onset_sample,q_wave_onset_sample,b_point_sample,b_point_sample,rr_interval_ms,pep_sample,pep_sample,pep_ms,pep_ms,nan_reason,nan_reason,error_per_sample_ms,absolute_error_per_sample_ms,absolute_relative_error_per_sample_percent
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,estimated,reference,estimated,reference,estimated,reference,estimated,reference,estimated,reference,estimated,estimated,reference,estimated,reference,estimated,reference,metric,metric,metric
q_wave_algorithm,b_point_algorithm,outlier_correction_algorithm,participant,phase,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2
q-wave-reference,arbol2017-isoelectric-crossings,forouzanfar2018,GDN0005,Pause,0,0,0,704.0,704.0,1616.0,1616.0,984.0,984.0,412.0,432.0,912.0,132.0,152.0,132.0,152,,,20.0,20.0,13.157895
q-wave-reference,arbol2017-isoelectric-crossings,forouzanfar2018,GDN0005,Pause,1,1,1,1616.0,1616.0,2504.0,2504.0,1896.0,1896.0,402.0,434.0,874.0,122.0,154.0,122.0,154,,,32.0,32.0,20.779221
q-wave-reference,arbol2017-isoelectric-crossings,forouzanfar2018,GDN0005,Pause,2,2,2,2504.0,2504.0,3372.0,3372.0,2772.0,2772.0,422.0,408.0,864.0,154.0,140.0,154.0,140,,,-14.0,14.0,10.000000
q-wave-reference,arbol2017-isoelectric-crossings,forouzanfar2018,GDN0005,Pause,3,3,3,3372.0,3372.0,4232.0,4230.0,3634.0,3634.0,428.0,404.0,858.0,166.0,142.0,166.0,142,,,-24.0,24.0,16.901408
q-wave-reference,arbol2017-isoelectric-crossings,forouzanfar2018,GDN0005,Pause,4,4,4,4232.0,4230.0,5140.0,5140.0,4492.0,4492.0,408.0,354.0,936.0,148.0,94.0,148.0,94,,,-54.0,54.0,57.446809
q-wave-reference,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
q-wave-reference,stern1985,none,GDN0030,TiltDown,51,51,51,54058.0,54058.0,55140.0,55140.0,54400.0,54400.0,496.0,498.0,1080.0,154.0,156.0,154.0,156,,,2.0,2.0,1.282051
q-wave-reference,stern1985,none,GDN0030,TiltDown,52,52,52,55140.0,55140.0,56236.0,56236.0,55480.0,55480.0,436.0,490.0,1106.0,96.0,150.0,96.0,150,,,54.0,54.0,36.000000
q-wave-reference,stern1985,none,GDN0030,TiltDown,53,53,53,56236.0,56236.0,57304.0,57304.0,56586.0,56586.0,454.0,454.0,1046.0,104.0,104.0,104.0,104,,,0.0,0.0,0.000000
q-wave-reference,stern1985,none,GDN0030,TiltDown,54,54,54,57304.0,57304.0,58334.0,58334.0,57632.0,57632.0,388.0,430.0,1022.0,60.0,102.0,60.0,102,,,42.0,42.0,41.176471


In [14]:
preprocessed_results_empkins_guardian = pd.concat([results_empkins_cleaned_ms, results_guardian_cleaned_ms])
preprocessed_results_empkins_guardian

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,heartbeat_id,heartbeat_id,heartbeat_start_sample,heartbeat_start_sample,heartbeat_end_sample,heartbeat_end_sample,q_wave_onset_sample,q_wave_onset_sample,b_point_sample,b_point_sample,rr_interval_ms,pep_sample,pep_sample,pep_ms,pep_ms,nan_reason,nan_reason,error_per_sample_ms,absolute_error_per_sample_ms,absolute_relative_error_per_sample_percent
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,estimated,reference,estimated,reference,estimated,reference,estimated,reference,estimated,reference,estimated,estimated,reference,estimated,reference,estimated,reference,metric,metric,metric
q_wave_algorithm,b_point_algorithm,outlier_correction_algorithm,participant,phase,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2
q-wave-reference,arbol2017-isoelectric-crossings,forouzanfar2018,VP_001,Prep,0,0,0,394.0,399.0,984.0,985.0,567.0,567.0,275.0,240.0,590.0,102.0,67.0,102.0,67,,,-35.0,35.0,52.238806
q-wave-reference,arbol2017-isoelectric-crossings,forouzanfar2018,VP_001,Prep,1,1,1,984.0,985.0,1569.0,1569.0,1156.0,1156.0,254.0,222.0,581.0,82.0,50.0,82.0,50,,,-32.0,32.0,64.000000
q-wave-reference,arbol2017-isoelectric-crossings,forouzanfar2018,VP_001,Prep,2,2,2,1569.0,1569.0,2134.0,2134.0,1735.0,1735.0,265.0,242.0,557.0,99.0,76.0,99.0,76,,,-23.0,23.0,30.263158
q-wave-reference,arbol2017-isoelectric-crossings,forouzanfar2018,VP_001,Prep,3,3,3,2134.0,2134.0,2684.0,2684.0,2291.0,2291.0,260.0,248.0,546.0,103.0,91.0,103.0,91,,,-12.0,12.0,13.186813
q-wave-reference,arbol2017-isoelectric-crossings,forouzanfar2018,VP_001,Prep,4,4,4,2684.0,2684.0,3227.0,3227.0,2846.0,2846.0,259.0,242.0,541.0,97.0,80.0,97.0,80,,,-17.0,17.0,21.250000
q-wave-reference,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
q-wave-reference,stern1985,none,GDN0030,TiltDown,51,51,51,54058.0,54058.0,55140.0,55140.0,54400.0,54400.0,496.0,498.0,1080.0,154.0,156.0,154.0,156,,,2.0,2.0,1.282051
q-wave-reference,stern1985,none,GDN0030,TiltDown,52,52,52,55140.0,55140.0,56236.0,56236.0,55480.0,55480.0,436.0,490.0,1106.0,96.0,150.0,96.0,150,,,54.0,54.0,36.000000
q-wave-reference,stern1985,none,GDN0030,TiltDown,53,53,53,56236.0,56236.0,57304.0,57304.0,56586.0,56586.0,454.0,454.0,1046.0,104.0,104.0,104.0,104,,,0.0,0.0,0.000000
q-wave-reference,stern1985,none,GDN0030,TiltDown,54,54,54,57304.0,57304.0,58334.0,58334.0,57632.0,57632.0,388.0,430.0,1022.0,60.0,102.0,60.0,102,,,42.0,42.0,41.176471


### Check whether the concatendated dataframe has the same size as the summed non-concatenated dataframes

In [15]:
print(preprocessed_results_empkins_guardian.shape[0] - (results_empkins_cleaned_ms.shape[0] + results_guardian_cleaned_ms.shape[0]))

0


### Data with or without outlier correction

In [16]:
drop_outlier_correction = True

In [17]:
input_data = preprocessed_results_empkins_guardian
if drop_outlier_correction: 
    input_data = input_data.xs(key='none', level='outlier_correction_algorithm', drop_level=True)

In [18]:
input_data

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,heartbeat_id,heartbeat_id,heartbeat_start_sample,heartbeat_start_sample,heartbeat_end_sample,heartbeat_end_sample,q_wave_onset_sample,q_wave_onset_sample,b_point_sample,b_point_sample,rr_interval_ms,pep_sample,pep_sample,pep_ms,pep_ms,nan_reason,nan_reason,error_per_sample_ms,absolute_error_per_sample_ms,absolute_relative_error_per_sample_percent
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,estimated,reference,estimated,reference,estimated,reference,estimated,reference,estimated,reference,estimated,estimated,reference,estimated,reference,estimated,reference,metric,metric,metric
q_wave_algorithm,b_point_algorithm,participant,phase,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2
q-wave-reference,arbol2017-isoelectric-crossings,VP_001,Prep,0,0,0,394.0,399.0,984.0,985.0,567.0,567.0,275.0,240.0,590.0,102.0,67.0,102.0,67,,,-35.0,35.0,52.238806
q-wave-reference,arbol2017-isoelectric-crossings,VP_001,Prep,1,1,1,984.0,985.0,1569.0,1569.0,1156.0,1156.0,254.0,222.0,581.0,82.0,50.0,82.0,50,,,-32.0,32.0,64.000000
q-wave-reference,arbol2017-isoelectric-crossings,VP_001,Prep,2,2,2,1569.0,1569.0,2134.0,2134.0,1735.0,1735.0,265.0,242.0,557.0,99.0,76.0,99.0,76,,,-23.0,23.0,30.263158
q-wave-reference,arbol2017-isoelectric-crossings,VP_001,Prep,3,3,3,2134.0,2134.0,2684.0,2684.0,2291.0,2291.0,260.0,248.0,546.0,103.0,91.0,103.0,91,,,-12.0,12.0,13.186813
q-wave-reference,arbol2017-isoelectric-crossings,VP_001,Prep,4,4,4,2684.0,2684.0,3227.0,3227.0,2846.0,2846.0,259.0,242.0,541.0,97.0,80.0,97.0,80,,,-17.0,17.0,21.250000
q-wave-reference,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
q-wave-reference,stern1985,GDN0030,TiltDown,51,51,51,54058.0,54058.0,55140.0,55140.0,54400.0,54400.0,496.0,498.0,1080.0,154.0,156.0,154.0,156,,,2.0,2.0,1.282051
q-wave-reference,stern1985,GDN0030,TiltDown,52,52,52,55140.0,55140.0,56236.0,56236.0,55480.0,55480.0,436.0,490.0,1106.0,96.0,150.0,96.0,150,,,54.0,54.0,36.000000
q-wave-reference,stern1985,GDN0030,TiltDown,53,53,53,56236.0,56236.0,57304.0,57304.0,56586.0,56586.0,454.0,454.0,1046.0,104.0,104.0,104.0,104,,,0.0,0.0,0.000000
q-wave-reference,stern1985,GDN0030,TiltDown,54,54,54,57304.0,57304.0,58334.0,58334.0,57632.0,57632.0,388.0,430.0,1022.0,60.0,102.0,60.0,102,,,42.0,42.0,41.176471


In [19]:
input_data = input_data[[("heartbeat_id", "reference"), ("b_point_sample", "estimated"), ("b_point_sample", "reference"), ("rr_interval_ms", "estimated")]]
input_data.columns = input_data.columns.to_flat_index().str.join("_")
input_data

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,heartbeat_id_reference,b_point_sample_estimated,b_point_sample_reference,rr_interval_ms_estimated
q_wave_algorithm,b_point_algorithm,participant,phase,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
q-wave-reference,arbol2017-isoelectric-crossings,VP_001,Prep,0,0,275.0,240.0,590.0
q-wave-reference,arbol2017-isoelectric-crossings,VP_001,Prep,1,1,254.0,222.0,581.0
q-wave-reference,arbol2017-isoelectric-crossings,VP_001,Prep,2,2,265.0,242.0,557.0
q-wave-reference,arbol2017-isoelectric-crossings,VP_001,Prep,3,3,260.0,248.0,546.0
q-wave-reference,arbol2017-isoelectric-crossings,VP_001,Prep,4,4,259.0,242.0,541.0
q-wave-reference,...,...,...,...,...,...,...,...
q-wave-reference,stern1985,GDN0030,TiltDown,51,51,496.0,498.0,1080.0
q-wave-reference,stern1985,GDN0030,TiltDown,52,52,436.0,490.0,1106.0
q-wave-reference,stern1985,GDN0030,TiltDown,53,53,454.0,454.0,1046.0
q-wave-reference,stern1985,GDN0030,TiltDown,54,54,388.0,430.0,1022.0


In [20]:
input_data.xs(key='VP_022', level='participant').max()

heartbeat_id_reference       39.0
b_point_sample_estimated    470.0
b_point_sample_reference    461.0
rr_interval_ms_estimated    952.0
dtype: float64

In [21]:
if drop_outlier_correction == False:
    input_data["algo_combis"] = input_data.index.get_level_values("b_point_algorithm") + "_" + input_data.index.get_level_values("outlier_correction_algorithm")
    input_data = input_data.reset_index().drop(columns=["b_point_algorithm", "outlier_correction_algorithm"])
input_data

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,heartbeat_id_reference,b_point_sample_estimated,b_point_sample_reference,rr_interval_ms_estimated
q_wave_algorithm,b_point_algorithm,participant,phase,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
q-wave-reference,arbol2017-isoelectric-crossings,VP_001,Prep,0,0,275.0,240.0,590.0
q-wave-reference,arbol2017-isoelectric-crossings,VP_001,Prep,1,1,254.0,222.0,581.0
q-wave-reference,arbol2017-isoelectric-crossings,VP_001,Prep,2,2,265.0,242.0,557.0
q-wave-reference,arbol2017-isoelectric-crossings,VP_001,Prep,3,3,260.0,248.0,546.0
q-wave-reference,arbol2017-isoelectric-crossings,VP_001,Prep,4,4,259.0,242.0,541.0
q-wave-reference,...,...,...,...,...,...,...,...
q-wave-reference,stern1985,GDN0030,TiltDown,51,51,496.0,498.0,1080.0
q-wave-reference,stern1985,GDN0030,TiltDown,52,52,436.0,490.0,1106.0
q-wave-reference,stern1985,GDN0030,TiltDown,53,53,454.0,454.0,1046.0
q-wave-reference,stern1985,GDN0030,TiltDown,54,54,388.0,430.0,1022.0


In [30]:
columns = ""
if drop_outlier_correction:
    columns="b_point_algorithm"
else:
    columns="algo_combis"
# Pivot the DataFrame
input_data_wide = input_data.pivot_table(
    index=['participant', 'phase', 'heartbeat_id_reference', 'b_point_sample_reference', 'rr_interval_ms_estimated'],
    columns=columns,
    values='b_point_sample_estimated'
).reset_index()

# Flatten the columns
input_data_wide.columns.name = None
input_data_wide.columns = [f'{col}' if isinstance(col, str) else f'{col[1]}' for col in input_data_wide.columns]
input_data_wide

Unnamed: 0,participant,phase,heartbeat_id_reference,b_point_sample_reference,rr_interval_ms_estimated,arbol2017-isoelectric-crossings,arbol2017-second-derivative,arbol2017-third-derivative,debski1993-second-derivative,drost2022,forounzafar2018,lozano2007-linear-regression,lozano2007-quadratic-regression,sherwood1990,stern1985
0,GDN0005,HoldingBreath,0,388.0,850.0,438.0,398.0,394.0,452.0,400.0,550.0,412.0,384.0,442.0,388.0
1,GDN0005,HoldingBreath,1,404.0,778.0,340.0,350.0,244.0,388.0,420.0,402.0,404.0,384.0,330.0,402.0
2,GDN0005,HoldingBreath,3,376.0,746.0,382.0,296.0,386.0,366.0,386.0,388.0,366.0,348.0,382.0,374.0
3,GDN0005,HoldingBreath,4,390.0,766.0,394.0,344.0,396.0,376.0,396.0,398.0,372.0,348.0,394.0,388.0
4,GDN0005,HoldingBreath,5,386.0,790.0,398.0,312.0,388.0,418.0,392.0,390.0,378.0,354.0,400.0,384.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11133,VP_032,Talk,39,310.0,619.0,335.0,276.0,324.0,300.0,318.0,306.0,305.0,294.0,337.0,306.0
11134,VP_032,Talk,40,322.0,680.0,331.0,298.0,330.0,311.0,322.0,311.0,321.0,303.0,329.0,287.0
11135,VP_032,Talk,41,340.0,719.0,317.0,300.0,348.0,330.0,343.0,309.0,332.0,322.0,311.0,287.0
11136,VP_032,Talk,42,311.0,698.0,365.0,324.0,366.0,347.0,354.0,305.0,351.0,337.0,368.0,305.0


In [26]:
input_data_wide.isna().any()

participant                        False
phase                              False
heartbeat_id_reference             False
b_point_sample_reference           False
rr_interval_ms_estimated           False
arbol2017-isoelectric-crossings     True
arbol2017-second-derivative        False
arbol2017-third-derivative          True
debski1993-second-derivative        True
drost2022                          False
forounzafar2018                     True
lozano2007-linear-regression       False
lozano2007-quadratic-regression    False
sherwood1990                        True
stern1985                           True
dtype: bool

In [38]:
test_amount_nan = input_data_wide[["participant", "phase", "heartbeat_id_reference", "b_point_sample_reference", "rr_interval_ms_estimated", "drost2022", "forounzafar2018", "lozano2007-quadratic-regression", "lozano2007-linear-regression", "debski1993-second-derivative"]]
test_amount_nan

Unnamed: 0,participant,phase,heartbeat_id_reference,b_point_sample_reference,rr_interval_ms_estimated,drost2022,forounzafar2018,lozano2007-quadratic-regression,lozano2007-linear-regression,debski1993-second-derivative
0,GDN0005,HoldingBreath,0,388.0,850.0,400.0,550.0,384.0,412.0,452.0
1,GDN0005,HoldingBreath,1,404.0,778.0,420.0,402.0,384.0,404.0,388.0
2,GDN0005,HoldingBreath,3,376.0,746.0,386.0,388.0,348.0,366.0,366.0
3,GDN0005,HoldingBreath,4,390.0,766.0,396.0,398.0,348.0,372.0,376.0
4,GDN0005,HoldingBreath,5,386.0,790.0,392.0,390.0,354.0,378.0,418.0
...,...,...,...,...,...,...,...,...,...,...
11133,VP_032,Talk,39,310.0,619.0,318.0,306.0,294.0,305.0,300.0
11134,VP_032,Talk,40,322.0,680.0,322.0,311.0,303.0,321.0,311.0
11135,VP_032,Talk,41,340.0,719.0,343.0,309.0,322.0,332.0,330.0
11136,VP_032,Talk,42,311.0,698.0,354.0,305.0,337.0,351.0,347.0


In [39]:
test_amount_nan.isna().any()

participant                        False
phase                              False
heartbeat_id_reference             False
b_point_sample_reference           False
rr_interval_ms_estimated           False
drost2022                          False
forounzafar2018                     True
lozano2007-quadratic-regression    False
lozano2007-linear-regression       False
debski1993-second-derivative        True
dtype: bool

In [40]:
test_amount_nan.dropna()

Unnamed: 0,participant,phase,heartbeat_id_reference,b_point_sample_reference,rr_interval_ms_estimated,drost2022,forounzafar2018,lozano2007-quadratic-regression,lozano2007-linear-regression,debski1993-second-derivative
0,GDN0005,HoldingBreath,0,388.0,850.0,400.0,550.0,384.0,412.0,452.0
1,GDN0005,HoldingBreath,1,404.0,778.0,420.0,402.0,384.0,404.0,388.0
2,GDN0005,HoldingBreath,3,376.0,746.0,386.0,388.0,348.0,366.0,366.0
3,GDN0005,HoldingBreath,4,390.0,766.0,396.0,398.0,348.0,372.0,376.0
4,GDN0005,HoldingBreath,5,386.0,790.0,392.0,390.0,354.0,378.0,418.0
...,...,...,...,...,...,...,...,...,...,...
11133,VP_032,Talk,39,310.0,619.0,318.0,306.0,294.0,305.0,300.0
11134,VP_032,Talk,40,322.0,680.0,322.0,311.0,303.0,321.0,311.0
11135,VP_032,Talk,41,340.0,719.0,343.0,309.0,322.0,332.0,330.0
11136,VP_032,Talk,42,311.0,698.0,354.0,305.0,337.0,351.0,347.0


In [27]:
input_data_wide = input_data_wide.dropna()
input_data_wide

Unnamed: 0,participant,phase,heartbeat_id_reference,b_point_sample_reference,rr_interval_ms_estimated,arbol2017-isoelectric-crossings,arbol2017-second-derivative,arbol2017-third-derivative,debski1993-second-derivative,drost2022,forounzafar2018,lozano2007-linear-regression,lozano2007-quadratic-regression,sherwood1990,stern1985
0,GDN0005,HoldingBreath,0,388.0,850.0,438.0,398.0,394.0,452.0,400.0,550.0,412.0,384.0,442.0,388.0
1,GDN0005,HoldingBreath,1,404.0,778.0,340.0,350.0,244.0,388.0,420.0,402.0,404.0,384.0,330.0,402.0
2,GDN0005,HoldingBreath,3,376.0,746.0,382.0,296.0,386.0,366.0,386.0,388.0,366.0,348.0,382.0,374.0
3,GDN0005,HoldingBreath,4,390.0,766.0,394.0,344.0,396.0,376.0,396.0,398.0,372.0,348.0,394.0,388.0
4,GDN0005,HoldingBreath,5,386.0,790.0,398.0,312.0,388.0,418.0,392.0,390.0,378.0,354.0,400.0,384.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11133,VP_032,Talk,39,310.0,619.0,335.0,276.0,324.0,300.0,318.0,306.0,305.0,294.0,337.0,306.0
11134,VP_032,Talk,40,322.0,680.0,331.0,298.0,330.0,311.0,322.0,311.0,321.0,303.0,329.0,287.0
11135,VP_032,Talk,41,340.0,719.0,317.0,300.0,348.0,330.0,343.0,309.0,332.0,322.0,311.0,287.0
11136,VP_032,Talk,42,311.0,698.0,365.0,324.0,366.0,347.0,354.0,305.0,351.0,337.0,368.0,305.0


In [28]:
print(f"Save results: {save_results}")

Save results: True


In [29]:
if save_results:
    input_data_wide.to_csv(result_path.joinpath("data/train_data_rr_interval_no_outlier_correction.csv"))