In [1]:
import pandas as pd
from pathlib import Path
from pepbench.evaluation._error_metrics import error, abs_error, rel_error, abs_rel_error
from pepbench.data_handling._data_handling import describe_ml_results_df
from pepbench.export._latex import create_ml_algo_performance_table

In [2]:
event = 'b-point'

In [3]:
if event == 'b-point':
    data_path = Path("../../../results/data/b-point")
elif event == 'q-peak':
    data_path = Path("../../../results/data/q-peak")
else:
    print("Select either b-point or q-peak!")

In [4]:
if event == 'b-point':
    merged_ml_results = pd.read_csv(data_path.joinpath("merged_ml_results_b_point_paper_rr_only.csv"), index_col=[0,1,2,3])
elif event == 'q-peak':
    merged_ml_results = pd.read_csv(data_path.joinpath("merged_ml_results_q_peak.csv"), index_col=[0,1,2,3])
else:
    print("Select either b-point or q-peak!")
merged_ml_results

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,b_point_sample_reference,arbol2017-isoelectric-crossings,arbol2017-second-derivative,arbol2017-third-derivative,debski1993-second-derivative,drost2022,forouzanfar2018,lozano2007-linear-regression,lozano2007-quadratic-regression,miljkovic2022,...,SS_KBest_SVR_RR-Interval-Median-Imputed,SS_KBest_KNR_RR-Interval-Median-Imputed,MinMax_SFM_DTR_RR-Interval-Median-Imputed,MinMax_SFM_RFR_RR-Interval-Median-Imputed,MinMax_SFM_SVR_RR-Interval-Median-Imputed,MinMax_SFM_KNR_RR-Interval-Median-Imputed,MinMax_KBest_DTR_RR-Interval-Median-Imputed,MinMax_KBest_RFR_RR-Interval-Median-Imputed,MinMax_KBest_SVR_RR-Interval-Median-Imputed,MinMax_KBest_KNR_RR-Interval-Median-Imputed
participant,condition,phase,heartbeat_id_reference,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
GDN0005,Dummy,HoldingBreath,0,388.0,438.0,398.0,394.0,452.0,400.0,550.0,412.0,384.0,442.0,...,412.398584,421.288455,392.0,430.592322,415.841272,416.688480,403.0,426.637331,417.722668,406.476710
GDN0005,Dummy,HoldingBreath,1,404.0,340.0,350.0,244.0,388.0,420.0,402.0,404.0,384.0,288.0,...,403.408903,401.485786,406.0,404.075637,405.752970,396.160236,404.0,403.465366,407.185039,401.542367
GDN0005,Dummy,HoldingBreath,3,376.0,382.0,296.0,386.0,366.0,386.0,388.0,366.0,348.0,386.0,...,376.768489,376.403625,376.0,375.332204,377.956802,377.268411,376.0,374.791562,379.388331,376.412492
GDN0005,Dummy,HoldingBreath,4,390.0,394.0,344.0,396.0,376.0,396.0,398.0,372.0,348.0,398.0,...,388.625256,388.643213,388.0,389.011683,388.530034,387.233813,388.0,389.433662,388.807399,388.685471
GDN0005,Dummy,HoldingBreath,5,386.0,398.0,312.0,388.0,418.0,392.0,390.0,378.0,354.0,392.0,...,385.761830,386.743597,384.0,385.868359,386.934831,390.040129,386.0,386.062038,388.201816,386.704448
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
VP_032,tsst,Talk,39,310.0,335.0,276.0,324.0,300.0,318.0,306.0,305.0,294.0,308.0,...,309.424969,319.918637,311.0,310.134872,308.018902,310.277235,311.0,310.595307,308.353270,319.951963
VP_032,tsst,Talk,40,322.0,331.0,298.0,330.0,311.0,322.0,311.0,321.0,303.0,329.0,...,312.280534,317.460122,321.0,317.331053,310.554869,315.145165,323.0,315.881624,311.007609,317.834377
VP_032,tsst,Talk,41,340.0,317.0,300.0,348.0,330.0,343.0,309.0,332.0,322.0,292.0,...,331.079043,329.227094,334.0,330.211990,330.103150,329.741920,334.0,325.627975,329.914521,329.172779
VP_032,tsst,Talk,42,311.0,365.0,324.0,366.0,347.0,354.0,305.0,351.0,337.0,312.0,...,342.187956,339.654610,351.5,335.669733,339.014104,342.487887,350.0,333.193183,339.168398,339.668703


In [5]:
if event == 'b-point':
    ref_data = merged_ml_results['b_point_sample_reference']
    iterable = merged_ml_results.drop(columns=['b_point_sample_reference', 'rr_interval_ms_estimated'])
elif event == 'q-peak':
    ref_data = merged_ml_results['q_peak_sample_reference']
    iterable = merged_ml_results.drop(columns=['q_peak_sample_reference', 'rr_interval_ms_estimated'])
else:
    print("Select either b-point or q-peak!")
merged_ml_results_metrics = pd.DataFrame(index=merged_ml_results.index)
new_columns_list = []
for column in iterable.columns:
    new_columns_list.append(error(ref_data=ref_data, est_data=iterable[column]).rename(f"{column}_error"))
    new_columns_list.append(rel_error(ref_data=ref_data, est_data=iterable[column]).rename(f"{column}_rel_error"))
    new_columns_list.append(abs_error(ref_data=ref_data, est_data=iterable[column]).rename(f"{column}_abs_error"))
    new_columns_list.append(abs_rel_error(ref_data=ref_data, est_data=iterable[column]).rename(f"{column}_abs_rel_error"))

merged_ml_results_metrics = pd.concat(new_columns_list, axis=1)

In [6]:
merged_ml_results_metrics

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,arbol2017-isoelectric-crossings_error,arbol2017-isoelectric-crossings_rel_error,arbol2017-isoelectric-crossings_abs_error,arbol2017-isoelectric-crossings_abs_rel_error,arbol2017-second-derivative_error,arbol2017-second-derivative_rel_error,arbol2017-second-derivative_abs_error,arbol2017-second-derivative_abs_rel_error,arbol2017-third-derivative_error,arbol2017-third-derivative_rel_error,...,MinMax_KBest_RFR_RR-Interval-Median-Imputed_abs_error,MinMax_KBest_RFR_RR-Interval-Median-Imputed_abs_rel_error,MinMax_KBest_SVR_RR-Interval-Median-Imputed_error,MinMax_KBest_SVR_RR-Interval-Median-Imputed_rel_error,MinMax_KBest_SVR_RR-Interval-Median-Imputed_abs_error,MinMax_KBest_SVR_RR-Interval-Median-Imputed_abs_rel_error,MinMax_KBest_KNR_RR-Interval-Median-Imputed_error,MinMax_KBest_KNR_RR-Interval-Median-Imputed_rel_error,MinMax_KBest_KNR_RR-Interval-Median-Imputed_abs_error,MinMax_KBest_KNR_RR-Interval-Median-Imputed_abs_rel_error
participant,condition,phase,heartbeat_id_reference,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
GDN0005,Dummy,HoldingBreath,0,-50.0,-0.128866,50.0,0.128866,-10.0,-0.025773,10.0,0.025773,-6.0,-0.015464,...,38.637331,0.099581,-29.722668,-0.076605,29.722668,0.076605,-18.476710,-0.047620,18.476710,0.047620
GDN0005,Dummy,HoldingBreath,1,64.0,0.158416,64.0,0.158416,54.0,0.133663,54.0,0.133663,160.0,0.396040,...,0.534634,0.001323,-3.185039,-0.007884,3.185039,0.007884,2.457633,0.006083,2.457633,0.006083
GDN0005,Dummy,HoldingBreath,3,-6.0,-0.015957,6.0,0.015957,80.0,0.212766,80.0,0.212766,-10.0,-0.026596,...,1.208438,0.003214,-3.388331,-0.009012,3.388331,0.009012,-0.412492,-0.001097,0.412492,0.001097
GDN0005,Dummy,HoldingBreath,4,-4.0,-0.010256,4.0,0.010256,46.0,0.117949,46.0,0.117949,-6.0,-0.015385,...,0.566338,0.001452,1.192601,0.003058,1.192601,0.003058,1.314529,0.003371,1.314529,0.003371
GDN0005,Dummy,HoldingBreath,5,-12.0,-0.031088,12.0,0.031088,74.0,0.191710,74.0,0.191710,-2.0,-0.005181,...,0.062038,0.000161,-2.201816,-0.005704,2.201816,0.005704,-0.704448,-0.001825,0.704448,0.001825
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
VP_032,tsst,Talk,39,-25.0,-0.080645,25.0,0.080645,34.0,0.109677,34.0,0.109677,-14.0,-0.045161,...,0.595307,0.001920,1.646730,0.005312,1.646730,0.005312,-9.951963,-0.032103,9.951963,0.032103
VP_032,tsst,Talk,40,-9.0,-0.027950,9.0,0.027950,24.0,0.074534,24.0,0.074534,-8.0,-0.024845,...,6.118376,0.019001,10.992391,0.034138,10.992391,0.034138,4.165623,0.012937,4.165623,0.012937
VP_032,tsst,Talk,41,23.0,0.067647,23.0,0.067647,40.0,0.117647,40.0,0.117647,-8.0,-0.023529,...,14.372025,0.042271,10.085479,0.029663,10.085479,0.029663,10.827221,0.031845,10.827221,0.031845
VP_032,tsst,Talk,42,-54.0,-0.173633,54.0,0.173633,-13.0,-0.041801,13.0,0.041801,-55.0,-0.176849,...,22.193183,0.071361,-28.168398,-0.090574,28.168398,0.090574,-28.668703,-0.092182,28.668703,0.092182


#### Save the dataframe containing the error metrices

In [7]:
# if event == 'b-point':
#    merged_ml_results_metrics.to_csv(data_path.joinpath("merged_ml_results_b_point_error_metrics_rr_only.csv"))
# elif event == 'q-peak':
#    merged_ml_results_metrics.to_csv(data_path.joinpath("merged_ml_results_q_peak_error_metrics.csv"))
# else:
#   print("Select either b-point or q-peak!")

### Compute MAE, ME, and MARE 

In [8]:
summarized_error_metrics = describe_ml_results_df(merged_ml_results_metrics)
summarized_error_metrics

Unnamed: 0_level_0,mean,mean,mean,mean,std,std,std,std
metric_type,MAE,MARE,ME,MRE,MAE,MARE,ME,MRE
B-Point Algorithm,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
MinMax_None_RFR_RR-Interval-Include-Nan,8.127195,2.4577,-0.03429,-0.221008,12.119868,3.924358,14.592711,4.625209
SS_None_RFR_RR-Interval-Include-Nan,8.176128,2.47075,-0.016219,-0.219021,12.190455,3.948903,14.678622,4.653065
MinMax_None_RFR_Without-RR-Interval-Include-Nan,8.260385,2.516612,-0.102849,-0.277577,13.029197,4.453063,15.426906,5.107506
SS_SFM_RFR_RR-Interval-Median-Imputed,8.30337,2.510506,-0.032807,-0.232501,12.295232,3.921546,14.836569,4.650555
SS_None_RFR_Without-RR-Interval-Include-Nan,8.316623,2.538004,-0.122022,-0.290668,13.04429,4.462719,15.46968,5.125759
MinMax_KBest_RFR_RR-Interval-Median-Imputed,8.332127,2.525267,0.000975,-0.215482,12.295956,3.932469,14.85332,4.668556
MinMax_SFM_RFR_RR-Interval-Median-Imputed,8.404488,2.552079,-0.058733,-0.238909,12.364311,3.967338,14.950401,4.711304
SS_KBest_RFR_RR-Interval-Median-Imputed,8.417848,2.551505,-0.061609,-0.240501,12.597694,3.991527,15.15139,4.731301
SS_KBest_SVR_RR-Interval-Median-Imputed,8.47152,2.590958,-0.197264,-0.296733,15.932213,5.420518,18.043547,6.000636
MinMax_SFM_DTR_RR-Interval-Median-Imputed,8.625426,2.634158,-0.085204,-0.205887,14.691449,4.597049,17.036316,5.294325


In [9]:
summarized_error_metrics.columns

MultiIndex([('mean',  'MAE'),
            ('mean', 'MARE'),
            ('mean',   'ME'),
            ('mean',  'MRE'),
            ( 'std',  'MAE'),
            ( 'std', 'MARE'),
            ( 'std',   'ME'),
            ( 'std',  'MRE')],
           names=[None, 'metric_type'])

In [10]:
# if event == 'b-point':
#    summarized_error_metrics.to_csv(data_path.joinpath("summarized_b_point_error_metrics_rr_only.csv"))
# elif event == 'q-peak':
#    summarized_error_metrics.to_csv(data_path.joinpath("summarized_q_peak_error_metrics.csv"))
# else:
#   print("Select either b-point or q-peak!")

In [14]:
metric_export_paper = create_ml_algo_performance_table(summarized_error_metrics, n_algos=40)
metric_export_paper

Unnamed: 0_level_0,MAE,MARE,ME,MRE
B-Point Algorithm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
MinMax_None_RFR_RR-Interval-Include-Nan,8.13 ± 12.12,2.46 ± 3.92,-0.03 ± 14.59,-0.22 ± 4.63
SS_None_RFR_RR-Interval-Include-Nan,8.18 ± 12.19,2.47 ± 3.95,-0.02 ± 14.68,-0.22 ± 4.65
MinMax_None_RFR_Without-RR-Interval-Include-Nan,8.26 ± 13.03,2.52 ± 4.45,-0.1 ± 15.43,-0.28 ± 5.11
SS_SFM_RFR_RR-Interval-Median-Imputed,8.3 ± 12.3,2.51 ± 3.92,-0.03 ± 14.84,-0.23 ± 4.65
SS_None_RFR_Without-RR-Interval-Include-Nan,8.32 ± 13.04,2.54 ± 4.46,-0.12 ± 15.47,-0.29 ± 5.13
MinMax_KBest_RFR_RR-Interval-Median-Imputed,8.33 ± 12.3,2.53 ± 3.93,0.0 ± 14.85,-0.22 ± 4.67
MinMax_SFM_RFR_RR-Interval-Median-Imputed,8.4 ± 12.36,2.55 ± 3.97,-0.06 ± 14.95,-0.24 ± 4.71
SS_KBest_RFR_RR-Interval-Median-Imputed,8.42 ± 12.6,2.55 ± 3.99,-0.06 ± 15.15,-0.24 ± 4.73
SS_KBest_SVR_RR-Interval-Median-Imputed,8.47 ± 15.93,2.59 ± 5.42,-0.2 ± 18.04,-0.3 ± 6.0
MinMax_SFM_DTR_RR-Interval-Median-Imputed,8.63 ± 14.69,2.63 ± 4.6,-0.09 ± 17.04,-0.21 ± 5.29


Mean performance of models per data imputation strategy

In [84]:
#datasets = ['RR-Interval', 'RR-Interval-Median-Imputed', 'RR-Interval-Include-Nan', 'Without-RR-Interval', 'Without-RR-Interval-Median-Imputed', 'Without-RR-Interval-Include-Nan']
datasets = ['RR-Interval-Include-Nan', 'RR-Interval-Median-Imputed']
dataset_results = pd.DataFrame()
for dataset in datasets:
    dataset_results[dataset] = summarized_error_metrics[summarized_error_metrics.index.str.endswith(f'_{dataset}')].mean(axis=0)
dataset_results = dataset_results.T
dataset_results

Unnamed: 0_level_0,mean,mean,mean,mean,std,std,std,std
metric_type,MAE,MARE,ME,MRE,MAE,MARE,ME,MRE
RR-Interval-Include-Nan,9.135756,2.749951,-0.030876,-0.227156,13.094406,4.230232,15.969036,5.041271
RR-Interval-Median-Imputed,8.925654,2.700109,-0.045767,-0.220022,14.030059,4.45696,16.641267,5.21038


In [85]:
create_ml_algo_performance_table(dataset_results)

Unnamed: 0,MAE,MARE,ME,MRE
RR-Interval-Median-Imputed,8.9 ± 14.0,2.7 ± 4.5,-0.0 ± 16.6,-0.2 ± 5.2
RR-Interval-Include-Nan,9.1 ± 13.1,2.7 ± 4.2,-0.0 ± 16.0,-0.2 ± 5.0


### Compute MAE, ME, and MARE per datasource (Empkins, Guardian)

In [61]:
empkins_data = merged_ml_results_metrics[merged_ml_results_metrics.index.get_level_values('condition') != 'Dummy'].droplevel('condition')
guardian_data = merged_ml_results_metrics.xs(key='Dummy', level='condition')

In [62]:
summary_metrics_empkins = describe_ml_results_df(empkins_data)
metric_export_paper_empkins = create_ml_algo_performance_table(summary_metrics_empkins, n_algos=15)
metric_export_paper_empkins

Unnamed: 0_level_0,MAE,MARE,ME,MRE
B-Point Algorithm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
SS_SFM_SVR_RR-Interval,7.7 ± 11.9,2.9 ± 4.7,-1.1 ± 14.1,-0.6 ± 5.5
MinMax_KBest_RFR_RR-Interval,7.7 ± 11.0,2.8 ± 4.1,0.3 ± 13.5,-0.1 ± 5.0
SS_SFM_RFR_RR-Interval,7.8 ± 10.8,2.8 ± 4.0,0.4 ± 13.3,-0.1 ± 4.9
SS_KBest_RFR_RR-Interval,7.8 ± 11.1,2.8 ± 4.2,0.4 ± 13.6,-0.1 ± 5.0
MinMax_SFM_RFR_RR-Interval,7.8 ± 10.9,2.8 ± 4.1,0.3 ± 13.4,-0.1 ± 5.0
SS_SFM_SVR_Without-RR-Interval,7.9 ± 12.3,2.9 ± 4.9,-0.9 ± 14.6,-0.6 ± 5.6
SS_KBest_RFR_Without-RR-Interval,7.9 ± 11.2,2.9 ± 4.1,0.4 ± 13.7,-0.1 ± 5.0
MinMax_KBest_RFR_Without-RR-Interval,7.9 ± 11.2,2.9 ± 4.1,0.5 ± 13.7,-0.1 ± 5.0
SS_SFM_RFR_Without-RR-Interval,8.0 ± 11.1,2.9 ± 4.1,0.5 ± 13.7,-0.1 ± 5.0
SS_KBest_SVR_RR-Interval,8.0 ± 13.7,2.9 ± 5.1,0.3 ± 15.8,-0.2 ± 5.9


In [63]:
summary_metrics_guardian = describe_ml_results_df(guardian_data)
metric_export_paper_guardian = create_ml_algo_performance_table(summary_metrics_guardian, n_algos=15)
metric_export_paper_guardian

Unnamed: 0_level_0,MAE,MARE,ME,MRE
B-Point Algorithm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
SS_KBest_SVR_RR-Interval,7.6 ± 13.4,1.8 ± 3.4,-0.9 ± 15.4,-0.4 ± 3.9
SS_KBest_SVR_Without-RR-Interval,7.6 ± 13.3,1.8 ± 3.4,-0.9 ± 15.3,-0.4 ± 3.9
MinMax_KBest_RFR_Without-RR-Interval,7.7 ± 12.1,1.9 ± 3.2,-0.3 ± 14.3,-0.2 ± 3.7
MinMax_SFM_RFR_RR-Interval,7.8 ± 12.0,1.9 ± 3.1,0.0 ± 14.3,-0.1 ± 3.6
MinMax_KBest_RFR_RR-Interval,7.8 ± 11.9,1.9 ± 3.1,-0.1 ± 14.2,-0.2 ± 3.6
SS_KBest_RFR_Without-RR-Interval,7.8 ± 11.8,1.9 ± 3.1,-0.4 ± 14.2,-0.2 ± 3.6
SS_SFM_RFR_RR-Interval,7.8 ± 11.8,1.9 ± 3.1,-0.1 ± 14.2,-0.2 ± 3.6
SS_SFM_SVR_Without-RR-Interval,7.8 ± 13.0,1.9 ± 3.4,-0.7 ± 15.2,-0.3 ± 3.8
SS_SFM_RFR_Without-RR-Interval,7.8 ± 12.0,1.9 ± 3.2,-0.2 ± 14.3,-0.2 ± 3.7
SS_KBest_RFR_RR-Interval,7.8 ± 12.0,1.9 ± 3.1,-0.1 ± 14.4,-0.2 ± 3.7


Evaluate the performance of the best models that are based on one of the following data imputation stratgeies:
- include missing values
- impute missing values (median)

This is performed by comparing the individual model performance on the subsets of data:
- no missing values occurr ( = dataset used to train the best performing algorithm)
- at least one feature contains missing values

In [64]:
merged_ml_results_metrics

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,arbol2017-isoelectric-crossings_error,arbol2017-isoelectric-crossings_rel_error,arbol2017-isoelectric-crossings_abs_error,arbol2017-isoelectric-crossings_abs_rel_error,arbol2017-second-derivative_error,arbol2017-second-derivative_rel_error,arbol2017-second-derivative_abs_error,arbol2017-second-derivative_abs_rel_error,arbol2017-third-derivative_error,arbol2017-third-derivative_rel_error,...,MinMax_KBest_RFR_RR-Interval-Median-Imputed_abs_error,MinMax_KBest_RFR_RR-Interval-Median-Imputed_abs_rel_error,MinMax_KBest_SVR_RR-Interval-Median-Imputed_error,MinMax_KBest_SVR_RR-Interval-Median-Imputed_rel_error,MinMax_KBest_SVR_RR-Interval-Median-Imputed_abs_error,MinMax_KBest_SVR_RR-Interval-Median-Imputed_abs_rel_error,MinMax_KBest_KNR_RR-Interval-Median-Imputed_error,MinMax_KBest_KNR_RR-Interval-Median-Imputed_rel_error,MinMax_KBest_KNR_RR-Interval-Median-Imputed_abs_error,MinMax_KBest_KNR_RR-Interval-Median-Imputed_abs_rel_error
participant,condition,phase,heartbeat_id_reference,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
GDN0005,Dummy,HoldingBreath,0,-50.0,-0.128866,50.0,0.128866,-10.0,-0.025773,10.0,0.025773,-6.0,-0.015464,...,38.637331,0.099581,-29.722668,-0.076605,29.722668,0.076605,-18.476710,-0.047620,18.476710,0.047620
GDN0005,Dummy,HoldingBreath,1,64.0,0.158416,64.0,0.158416,54.0,0.133663,54.0,0.133663,160.0,0.396040,...,0.534634,0.001323,-3.185039,-0.007884,3.185039,0.007884,2.457633,0.006083,2.457633,0.006083
GDN0005,Dummy,HoldingBreath,3,-6.0,-0.015957,6.0,0.015957,80.0,0.212766,80.0,0.212766,-10.0,-0.026596,...,1.208438,0.003214,-3.388331,-0.009012,3.388331,0.009012,-0.412492,-0.001097,0.412492,0.001097
GDN0005,Dummy,HoldingBreath,4,-4.0,-0.010256,4.0,0.010256,46.0,0.117949,46.0,0.117949,-6.0,-0.015385,...,0.566338,0.001452,1.192601,0.003058,1.192601,0.003058,1.314529,0.003371,1.314529,0.003371
GDN0005,Dummy,HoldingBreath,5,-12.0,-0.031088,12.0,0.031088,74.0,0.191710,74.0,0.191710,-2.0,-0.005181,...,0.062038,0.000161,-2.201816,-0.005704,2.201816,0.005704,-0.704448,-0.001825,0.704448,0.001825
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
VP_032,tsst,Talk,39,-25.0,-0.080645,25.0,0.080645,34.0,0.109677,34.0,0.109677,-14.0,-0.045161,...,0.595307,0.001920,1.646730,0.005312,1.646730,0.005312,-9.951963,-0.032103,9.951963,0.032103
VP_032,tsst,Talk,40,-9.0,-0.027950,9.0,0.027950,24.0,0.074534,24.0,0.074534,-8.0,-0.024845,...,6.118376,0.019001,10.992391,0.034138,10.992391,0.034138,4.165623,0.012937,4.165623,0.012937
VP_032,tsst,Talk,41,23.0,0.067647,23.0,0.067647,40.0,0.117647,40.0,0.117647,-8.0,-0.023529,...,14.372025,0.042271,10.085479,0.029663,10.085479,0.029663,10.827221,0.031845,10.827221,0.031845
VP_032,tsst,Talk,42,-54.0,-0.173633,54.0,0.173633,-13.0,-0.041801,13.0,0.041801,-55.0,-0.176849,...,22.193183,0.071361,-28.168398,-0.090574,28.168398,0.090574,-28.668703,-0.092182,28.668703,0.092182


In [71]:
best_algo_drop_nan = 'MinMax_KBest_RFR_RR-Interval'
best_algo_include_nan = 'MinMax_None_RFR_RR-Interval-Include-Nan'
best_algo_median_imputed = 'SS_SFM_RFR_RR-Interval-Median-Imputed'

In [73]:
best_algos_performance = merged_ml_results_metrics[[f'{best_algo_drop_nan}_abs_error', f'{best_algo_include_nan}_abs_error', f'{best_algo_median_imputed}_abs_error']]
#best_algos_performance = merged_ml_results_metrics[[f'{best_algo_include_nan}_abs_error', f'{best_algo_median_imputed}_abs_error']]
best_algos_performance

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,MinMax_KBest_RFR_RR-Interval_abs_error,MinMax_None_RFR_RR-Interval-Include-Nan_abs_error,SS_SFM_RFR_RR-Interval-Median-Imputed_abs_error
participant,condition,phase,heartbeat_id_reference,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
GDN0005,Dummy,HoldingBreath,0,32.550529,31.768000,35.714513
GDN0005,Dummy,HoldingBreath,1,0.843986,0.648000,0.420155
GDN0005,Dummy,HoldingBreath,3,1.572993,0.256000,0.374530
GDN0005,Dummy,HoldingBreath,4,0.768750,1.293600,1.300286
GDN0005,Dummy,HoldingBreath,5,0.555262,0.088444,0.484113
...,...,...,...,...,...,...
VP_032,tsst,Talk,39,0.038733,0.040000,0.415910
VP_032,tsst,Talk,40,9.043615,5.660000,4.582487
VP_032,tsst,Talk,41,16.989542,15.168000,8.382698
VP_032,tsst,Talk,42,27.518967,26.744000,24.837445


In [94]:
best_algos_performance.describe()

Unnamed: 0,MinMax_KBest_RFR_RR-Interval_abs_error,MinMax_None_RFR_RR-Interval-Include-Nan_abs_error,SS_SFM_RFR_RR-Interval-Median-Imputed_abs_error
count,10305.0,11239.0,11138.0
mean,7.76012,8.127195,8.30337
std,11.555173,12.119868,12.295232
min,0.000159,0.000906,0.000146
25%,1.216281,1.263296,1.305405
50%,3.29673,3.485674,3.620863
75%,9.557387,10.086356,10.201339
max,178.464573,189.486654,186.877517


Performance on the subset of data without missing values

In [75]:
best_algos_performance[~best_algos_performance[f'{best_algo_drop_nan}_abs_error'].isna()].describe()

Unnamed: 0,MinMax_KBest_RFR_RR-Interval_abs_error,MinMax_None_RFR_RR-Interval-Include-Nan_abs_error,SS_SFM_RFR_RR-Interval-Median-Imputed_abs_error
count,10305.0,10305.0,10305.0
mean,7.76012,7.757768,7.897836
std,11.555173,11.424921,11.621399
min,0.000159,0.000906,0.000146
25%,1.216281,1.212388,1.256956
50%,3.29673,3.260708,3.43981
75%,9.557387,9.597985,9.63388
max,178.464573,176.504471,186.877517


Performance on the subset of data with missing values

In [76]:
best_algos_performance[best_algos_performance[f'{best_algo_drop_nan}_abs_error'].isna()].describe()

Unnamed: 0,MinMax_KBest_RFR_RR-Interval_abs_error,MinMax_None_RFR_RR-Interval-Include-Nan_abs_error,SS_SFM_RFR_RR-Interval-Median-Imputed_abs_error
count,0.0,934.0,833.0
mean,,12.203157,13.320206
std,,17.595886,17.991847
min,,0.004,0.013289
25%,,2.441908,2.5607
50%,,6.357265,7.484738
75%,,14.737172,17.560757
max,,189.486654,186.325302
