In [15]:
# Data handling
import numpy as np
import pandas as pd
from IPython.display import display

from sklearn.metrics import (precision_score, recall_score, classification_report, accuracy_score, hamming_loss, f1_score, fbeta_score,
    jaccard_score,
    roc_auc_score,
    average_precision_score,
    matthews_corrcoef,
    cohen_kappa_score
)


# Signal processing
import scipy.signal as signal

# Feature extraction
import neurokit2 as nk     # for ECG feature extraction (heart rate, peaks, etc.)

# Visualization
import matplotlib.pyplot as plt
import json

import matplotlib.pyplot as plt
from challenge import load_header, get_labels

In [16]:

classes = ['6374002', '10370003', '17338001', '39732003', '47665007', '59118001', '59931005',
                '111975006', '164889003', '164890007', '164909002', '164917005', '164934002',
                '164947007', '251146004', '270492004', '284470004', '365413008', '426177001', '426627000',
                '426783006', '427084000', '427393009', '445118002', '698252002', '713426002']
classes_labels = ['BBB', 'PR', 'PVC|VPB', 'LAD', 'RAD', 'RBBB|CRBBB', 'TInv',
                'LQT', 'AF', 'AFL', 'CLBBB|LBBB', 'QAb', 'TAb',
                'LPR', 'LQRSV', 'IAVB', 'PAC', 'PRWP', 'SB', 'Brady',
                'NSR', 'STach', 'SA', 'LAnFB', 'NSIVCB', 'IRBBB']


def clean_labels(header):
    classes_from_header = get_labels(header)
    
    if '733534002' in classes_from_header:
        classes_from_header[classes_from_header.index('733534002')] = '164909002'
        classes_from_header = list(set(classes_from_header))
    if '713427006' in classes_from_header:
        classes_from_header[classes_from_header.index('713427006')] = '59118001'
        classes_from_header = list(set(classes_from_header))
    if '63593006' in classes_from_header:
        classes_from_header[classes_from_header.index('63593006')] = '284470004'
        classes_from_header = list(set(classes_from_header))
    if '427172004' in classes_from_header:
        classes_from_header[classes_from_header.index('427172004')] = '17338001'
        classes_from_header = list(set(classes_from_header))
    
    local_label = np.zeros((26,), dtype=bool)
    for label in classes_from_header:
        if label in classes:
            j = classes.index(label)
            local_label[j] = True

    
    return classes_from_header, local_label


In [17]:
with open("results/ALPHA_ONLY_NEW_DOMAIN_KNOWLEDGE_12_LSTM_7_2_training_and_test/2025-01-12/fold_0.json", "r") as f:
    results = json.load(f)
    print(results.keys())

print(len(results["labels"]))
print(results["labels"][0])

dict_keys(['c', 'labels', 'binary_outputs', 'scalar_outputs', 'times', 'auroc', 'auprc', 'auroc_classes', 'auprc_classes', 'f_measure', 'f_measure_classes', 'challenge_metric', 'leads', 'fold', 'experiment', 'network', 'accuracy'])
15021
[False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, True, False]


In [18]:
test_filename = "h5_datasets/3s_full/cinc_database_test_('I', 'II', 'III', 'aVR', 'aVL', 'aVF', 'V1', 'V2', 'V3', 'V4', 'V5', 'V6')_0.h5_header_recording_files.json"
with open(test_filename, 'r') as f:
    loaded_dict = json.load(f)
    print(len(loaded_dict['header_files']))
    print(len(loaded_dict['recording_files']))


header = load_header(loaded_dict['header_files'][0])
current_labels, labels_bin= clean_labels(header)
print(current_labels)
print(labels_bin)

15021
15021
['164889003', '251146004']
[False False False False False False False False  True False False False
 False False  True False False False False False False False False False
 False False]


In [19]:
def load_info_for_single_fold(fold: int, experiment_results_path: str):
    with open(f"{experiment_results_path}/fold_{fold}.json", "r") as f:
        results = json.load(f)
        
    return results
                
    

In [20]:
single_fold_full_results = load_info_for_single_fold(0, "results/ALPHA_ONLY_NEW_DOMAIN_KNOWLEDGE_12_LSTM_7_2_training_and_test/2025-01-12")
single_fold_full_results.keys()


dict_keys(['c', 'labels', 'binary_outputs', 'scalar_outputs', 'times', 'auroc', 'auprc', 'auroc_classes', 'auprc_classes', 'f_measure', 'f_measure_classes', 'challenge_metric', 'leads', 'fold', 'experiment', 'network', 'accuracy'])

In [21]:
def calculate_all_measures(y_true, y_pred, class_names):
    precision = precision_score(y_true, y_pred, average=None)
    recall = recall_score(y_true, y_pred, average=None)
    accuracy = accuracy_score(y_true, y_pred)
    h_loss = hamming_loss(y_true, y_pred)
    hamming_acc = 1 - h_loss
    f1 = f1_score(y_true, y_pred, average=None)
    f2 = fbeta_score(y_true, y_pred, beta=2, average=None)
    jaccard = jaccard_score(y_true, y_pred, average=None)
    roc_auc = roc_auc_score(y_true, y_pred, average=None)
    avg_prec = average_precision_score(y_true, y_pred, average=None)
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
    mcc = [matthews_corrcoef(y_true[:, i], y_pred[:, i]) for i in range(y_true.shape[1])]
    support = y_true.sum(axis=0)  # Number of true instances per class

    
    df = pd.DataFrame({
    'Precision'         : precision,
    'Recall'            : recall,
    'F1-Score'          : f1,
    'F2-Score'          : f2,
    'Jaccard (IoU)'     : jaccard,
    'ROC AUC'           : roc_auc,
    'Avg Precision AUC' : avg_prec,
    'MCC'               : mcc,
    'Support'           : support
    }, index=class_names)
    return df

In [27]:
branches = {
    "alpha": "results/ALPHA_ONLY_NEW_DOMAIN_KNOWLEDGE_12_LSTM_7_2_training_and_test/2025-01-12",
    "beta": "results/BETA_ONLY_NEW_DOMAIN_KNOWLEDGE_12_LSTM_7_2_training_and_test/2025-01-13",
    "gamma": "results/DELTA_ONLY_NEW_DOMAIN_KNOWLEDGE_12_LSTM_7_2_training_and_test/2025-01-14",
    "delta": "results/GAMMA_ONLY_NEW_DOMAIN_KNOWLEDGE_12_LSTM_7_2_training_and_test/2025-01-14",
    "epsilon": "results/EPSILON_ONLY_NEW_DOMAIN_KNOWLEDGE_12_LSTM_7_2_training_and_test/2025-01-14",
    "zeta": "results/ZETA_ONLY_NEW_DOMAIN_KNOWLEDGE_12_LSTM_7_2_training_and_test/2025-01-16"
    }
final_dict = {}
for branch_name, path in branches.items():
    dfs = []
    for fold in range(5):
        fold_results = load_info_for_single_fold(fold, path)
        y_true = fold_results['labels']
        y_pred = fold_results['binary_outputs']
        dfs.append(calculate_all_measures(y_true, y_pred, classes_labels))

    df_avg = sum(dfs) / len(dfs)
    display(df_avg)

    final_dict[branch_name] = df_avg
    

Unnamed: 0,Precision,Recall,F1-Score,F2-Score,Jaccard (IoU),ROC AUC,Avg Precision AUC,MCC,Support
BBB,0.107166,0.41347,0.168511,0.25905,0.09218,0.597253,0.080591,0.108985,906.2
PR,0.204655,0.505117,0.288329,0.385822,0.169317,0.636354,0.154864,0.192291,1536.2
PVC|VPB,0.009764,0.899264,0.019314,0.046754,0.009752,0.654703,0.00942,0.050131,95.8
LAD,0.004958,0.4308,0.009799,0.023662,0.004925,0.561885,0.004237,0.015704,53.6
RAD,0.017311,0.20852,0.031662,0.063367,0.016194,0.524549,0.018762,0.010988,243.8
RBBB|CRBBB,0.330154,0.93316,0.485937,0.680242,0.322105,0.912768,0.311166,0.518,785.8
TInv,0.054589,0.610578,0.100043,0.200146,0.052668,0.582459,0.049085,0.065736,605.2
LQT,0.017062,0.602917,0.033165,0.076484,0.016863,0.527403,0.016638,0.013251,235.6
AF,0.081027,0.475622,0.137991,0.23936,0.07422,0.552105,0.073497,0.05279,975.6
AFL,0.016306,0.453395,0.031456,0.071099,0.015986,0.546212,0.014986,0.021513,199.6


Unnamed: 0,Precision,Recall,F1-Score,F2-Score,Jaccard (IoU),ROC AUC,Avg Precision AUC,MCC,Support
BBB,0.098945,0.3213,0.150489,0.220058,0.081412,0.566419,0.07282,0.079724,906.2
PR,0.235671,0.247499,0.239487,0.243714,0.136486,0.578343,0.136165,0.152589,1536.2
PVC|VPB,0.011655,0.884987,0.023006,0.055352,0.011637,0.700841,0.011041,0.064162,95.8
LAD,0.004062,0.288247,0.008004,0.019161,0.004019,0.516847,0.003717,0.004602,53.6
RAD,0.012263,0.092792,0.021621,0.039944,0.010939,0.485925,0.015974,-0.011211,243.8
RBBB|CRBBB,0.300826,0.932577,0.453626,0.654299,0.294233,0.905233,0.283922,0.489684,785.8
TInv,0.053175,0.588808,0.09745,0.19484,0.051225,0.572995,0.047792,0.058099,605.2
LQT,0.019782,0.674092,0.038387,0.088131,0.019572,0.571438,0.018498,0.036429,235.6
AF,0.077237,0.421919,0.129667,0.219992,0.069371,0.535774,0.070338,0.037061,975.6
AFL,0.017078,0.394781,0.03269,0.072449,0.016626,0.541007,0.014842,0.020995,199.6


Unnamed: 0,Precision,Recall,F1-Score,F2-Score,Jaccard (IoU),ROC AUC,Avg Precision AUC,MCC,Support
BBB,0.088334,0.371671,0.142449,0.225705,0.076694,0.563013,0.070813,0.068788,906.2
PR,0.21221,0.484876,0.293595,0.383373,0.172093,0.639206,0.155216,0.199718,1536.2
PVC|VPB,0.012216,0.773748,0.024045,0.057386,0.012172,0.683498,0.010897,0.05984,95.8
LAD,0.004532,0.600014,0.008996,0.02199,0.004519,0.563233,0.004198,0.015442,53.6
RAD,0.028201,0.809567,0.054467,0.123486,0.027999,0.672749,0.02581,0.08829,243.8
RBBB|CRBBB,0.082924,0.882603,0.151398,0.300342,0.081937,0.668891,0.079075,0.152918,785.8
TInv,0.040406,0.641881,0.076004,0.161256,0.039507,0.501062,0.040427,0.000816,605.2
LQT,0.019091,0.723692,0.037169,0.086088,0.018942,0.563697,0.018125,0.032683,235.6
AF,0.072489,0.309222,0.117187,0.186367,0.062242,0.516796,0.067243,0.018633,975.6
AFL,0.014651,0.354915,0.028123,0.062753,0.014263,0.51729,0.013802,0.008378,199.6


Unnamed: 0,Precision,Recall,F1-Score,F2-Score,Jaccard (IoU),ROC AUC,Avg Precision AUC,MCC,Support
BBB,0.080131,0.409911,0.12567,0.197369,0.06707,0.532307,0.065541,0.03958,906.2
PR,0.179487,0.387639,0.209079,0.25407,0.116797,0.550651,0.120309,0.094182,1536.2
PVC|VPB,0.011539,0.894591,0.022753,0.05459,0.011517,0.672783,0.010839,0.055932,95.8
LAD,0.004317,0.618377,0.008559,0.020863,0.0043,0.536503,0.003968,0.009486,53.6
RAD,0.031815,0.703333,0.060397,0.131224,0.031211,0.637727,0.026076,0.077679,243.8
RBBB|CRBBB,0.167066,0.889382,0.272633,0.447195,0.162145,0.760903,0.151719,0.27765,785.8
TInv,0.042999,0.673373,0.080507,0.169225,0.041944,0.517745,0.041813,0.014047,605.2
LQT,0.017386,0.744892,0.033916,0.078982,0.017257,0.531343,0.016897,0.017617,235.6
AF,0.079015,0.544573,0.135084,0.238028,0.072451,0.538986,0.07098,0.040723,975.6
AFL,0.015567,0.508478,0.030008,0.067762,0.015236,0.526367,0.014172,0.01318,199.6


Unnamed: 0,Precision,Recall,F1-Score,F2-Score,Jaccard (IoU),ROC AUC,Avg Precision AUC,MCC,Support
BBB,0.09127,0.291602,0.138135,0.201013,0.074288,0.55368,0.069846,0.064202,906.2
PR,0.181882,0.305979,0.226762,0.267821,0.12808,0.57436,0.126964,0.119376,1536.2
PVC|VPB,0.011674,0.921404,0.023053,0.055532,0.011662,0.708444,0.011254,0.066654,95.8
LAD,0.003987,0.285261,0.007857,0.018823,0.003944,0.518118,0.003724,0.004433,53.6
RAD,0.018711,0.140264,0.032883,0.060476,0.016735,0.511582,0.016817,0.008234,243.8
RBBB|CRBBB,0.297031,0.949122,0.451654,0.657896,0.292221,0.911796,0.2844,0.49154,785.8
TInv,0.051125,0.590317,0.094054,0.189616,0.049349,0.56488,0.046674,0.051266,605.2
LQT,0.021235,0.700981,0.041195,0.094501,0.02104,0.590151,0.019581,0.045358,235.6
AF,0.077304,0.467661,0.132488,0.232032,0.070962,0.540597,0.070908,0.040911,975.6
AFL,0.017024,0.480628,0.032883,0.074548,0.016723,0.553671,0.015242,0.025542,199.6


Unnamed: 0,Precision,Recall,F1-Score,F2-Score,Jaccard (IoU),ROC AUC,Avg Precision AUC,MCC,Support
BBB,0.102456,0.395289,0.161637,0.249104,0.087938,0.586041,0.076865,0.096985,906.2
PR,0.176336,0.422467,0.247999,0.329017,0.142284,0.598207,0.13553,0.13872,1536.2
PVC|VPB,0.009399,0.904347,0.0186,0.045073,0.009389,0.643936,0.009083,0.047602,95.8
LAD,0.004253,0.328825,0.008395,0.020199,0.004217,0.526578,0.00389,0.007143,53.6
RAD,0.01647,0.152924,0.029689,0.057339,0.015098,0.500972,0.016569,0.000748,243.8
RBBB|CRBBB,0.280582,0.936367,0.431056,0.636614,0.275297,0.901102,0.266091,0.470647,785.8
TInv,0.050434,0.764585,0.094478,0.198623,0.049605,0.575673,0.047963,0.062031,605.2
LQT,0.016321,0.704567,0.031897,0.074629,0.016217,0.508965,0.016316,0.002998,235.6
AF,0.088561,0.602218,0.153919,0.276994,0.083758,0.588035,0.081997,0.08822,975.6
AFL,0.01775,0.606689,0.034437,0.079036,0.017526,0.584327,0.016538,0.038448,199.6


In [26]:
def get_df_for_given_metric(final_dict, metric: str):
    df_base = {}
    for branch_name, df in final_dict.items():
        df_base[branch_name] = df[metric]

    return pd.DataFrame(df_base)

metrics = ["Precision", "Recall", "F1-Score", "F2-Score", "ROC AUC", "Support"]
col_fmt = "".join(["c"] * 6)
for metric in metrics:
    df = get_df_for_given_metric(final_dict, metric)
    print(df.to_latex(index=True, column_format=col_fmt, caption=f"{metric} at each branch per class.",label=f"tab:{metric}_table"))


\begin{table}
\caption{Precision at each branch per class.}
\label{tab:Precision_table}
\begin{tabular}{cccccc}
\toprule
 & α & β & γ & δ & ϵ & ζ \\
\midrule
BBB & 0.107166 & 0.098945 & 0.088334 & 0.080131 & 0.091270 & 0.102456 \\
PR & 0.204655 & 0.235671 & 0.212210 & 0.179487 & 0.181882 & 0.176336 \\
PVC|VPB & 0.009764 & 0.011655 & 0.012216 & 0.011539 & 0.011674 & 0.009399 \\
LAD & 0.004958 & 0.004062 & 0.004532 & 0.004317 & 0.003987 & 0.004253 \\
RAD & 0.017311 & 0.012263 & 0.028201 & 0.031815 & 0.018711 & 0.016470 \\
RBBB|CRBBB & 0.330154 & 0.300826 & 0.082924 & 0.167066 & 0.297031 & 0.280582 \\
TInv & 0.054589 & 0.053175 & 0.040406 & 0.042999 & 0.051125 & 0.050434 \\
LQT & 0.017062 & 0.019782 & 0.019091 & 0.017386 & 0.021235 & 0.016321 \\
AF & 0.081027 & 0.077237 & 0.072489 & 0.079015 & 0.077304 & 0.088561 \\
AFL & 0.016306 & 0.017078 & 0.014651 & 0.015567 & 0.017024 & 0.017750 \\
CLBBB|LBBB & 0.009176 & 0.008848 & 0.007984 & 0.007719 & 0.008565 & 0.008370 \\
QAb & 0.035765 & 0.031

In [36]:
df = pd.read_csv("dx_mapping_scored.csv")
df["Dx"] = df["Dx"].str.capitalize()
df_interesting = df[["Abbreviation", "Dx"]]
display(df_interesting)
print(df_interesting.to_latex(index=False))


Unnamed: 0,Abbreviation,Dx
0,AF,Atrial fibrillation
1,AFL,Atrial flutter
2,BBB,Bundle branch block
3,Brady,Bradycardia
4,CLBBB,Complete left bundle branch block
5,CRBBB,Complete right bundle branch block
6,IAVB,1st degree av block
7,IRBBB,Incomplete right bundle branch block
8,LAD,Left axis deviation
9,LAnFB,Left anterior fascicular block


\begin{tabular}{ll}
\toprule
Abbreviation & Dx \\
\midrule
AF & Atrial fibrillation \\
AFL & Atrial flutter \\
BBB & Bundle branch block \\
Brady & Bradycardia \\
CLBBB & Complete left bundle branch block \\
CRBBB & Complete right bundle branch block \\
IAVB & 1st degree av block \\
IRBBB & Incomplete right bundle branch block \\
LAD & Left axis deviation \\
LAnFB & Left anterior fascicular block \\
LBBB & Left bundle branch block \\
LQRSV & Low qrs voltages \\
NSIVCB & Nonspecific intraventricular conduction disorder \\
NSR & Sinus rhythm \\
PAC & Premature atrial contraction \\
PR & Pacing rhythm \\
PRWP & Poor r wave progression \\
PVC & Premature ventricular contractions \\
LPR & Prolonged pr interval \\
LQT & Prolonged qt interval \\
QAb & Qwave abnormal \\
RAD & Right axis deviation \\
RBBB & Right bundle branch block \\
SA & Sinus arrhythmia \\
SB & Sinus bradycardia \\
STach & Sinus tachycardia \\
SVPB & Supraventricular premature beats \\
TAb & T wave abnormal \\
TInv & T wave

In [43]:
branch_6_best_lstm="results/experiment_v4.21-Bigger-LSTM-Dropout-hidden-prune-0.05-each-epoch-L1_6_BRANCHES_3s_full_12_LSTM_11_2_training_and_test/2025-03-15"
dfs = []
for fold in range(5):
    fold_results = load_info_for_single_fold(fold, branch_6_best_lstm)
    y_true = fold_results['labels']
    y_pred = fold_results['binary_outputs']
    dfs.append(calculate_all_measures(y_true, y_pred, classes_labels))

df_avg = sum(dfs) / len(dfs)
df_final = df_avg[["Precision", "Recall", "F1-Score", "F2-Score", "ROC AUC", "Support"]]
display(df_final)
print(df_final.to_latex(index=True, column_format=col_fmt, caption=f"Performance metrics for architecture combining 6 branches. Average of 5 folds.",label=f"tab:{metric}_6branch_table"))



Unnamed: 0,Precision,Recall,F1-Score,F2-Score,ROC AUC,Support
BBB,0.101532,0.215214,0.136205,0.173574,0.543275,942.8
PR,0.386206,0.243268,0.295677,0.261496,0.597415,1683.4
PVC|VPB,0.018211,0.741141,0.035541,0.082847,0.732057,102.6
LAD,0.004746,0.280139,0.009331,0.022194,0.529168,57.4
RAD,0.011326,0.063903,0.019178,0.032938,0.483921,251.2
RBBB|CRBBB,0.442107,0.948938,0.601828,0.770135,0.93467,921.4
TInv,0.056053,0.419457,0.098779,0.18219,0.547519,661.6
LQT,0.026475,0.386431,0.049502,0.103596,0.548306,300.8
AF,0.092837,0.317857,0.143298,0.213229,0.530722,1160.8
AFL,0.030499,0.337328,0.055902,0.111791,0.542447,345.2


\begin{table}
\caption{Performance metrics for architecture combining 6 branches. Average of 5 folds.}
\label{tab:Precision_6branch_table}
\begin{tabular}{cccccc}
\toprule
 & Precision & Recall & F1-Score & F2-Score & ROC AUC & Support \\
\midrule
BBB & 0.101532 & 0.215214 & 0.136205 & 0.173574 & 0.543275 & 942.800000 \\
PR & 0.386206 & 0.243268 & 0.295677 & 0.261496 & 0.597415 & 1683.400000 \\
PVC|VPB & 0.018211 & 0.741141 & 0.035541 & 0.082847 & 0.732057 & 102.600000 \\
LAD & 0.004746 & 0.280139 & 0.009331 & 0.022194 & 0.529168 & 57.400000 \\
RAD & 0.011326 & 0.063903 & 0.019178 & 0.032938 & 0.483921 & 251.200000 \\
RBBB|CRBBB & 0.442107 & 0.948938 & 0.601828 & 0.770135 & 0.934670 & 921.400000 \\
TInv & 0.056053 & 0.419457 & 0.098779 & 0.182190 & 0.547519 & 661.600000 \\
LQT & 0.026475 & 0.386431 & 0.049502 & 0.103596 & 0.548306 & 300.800000 \\
AF & 0.092837 & 0.317857 & 0.143298 & 0.213229 & 0.530722 & 1160.800000 \\
AFL & 0.030499 & 0.337328 & 0.055902 & 0.111791 & 0.542447 & 345.2

In [44]:
smaller_lstm = "results/experiment_v4.12-Dropout-and-small-prune-each-epoch-L1_6_BRANCHES_3s_full_12_LSTM_7_2_training_and_test/2025-03-07"
dfs = []
for fold in range(5):
    fold_results = load_info_for_single_fold(fold, smaller_lstm)
    y_true = fold_results['labels']
    y_pred = fold_results['binary_outputs']
    dfs.append(calculate_all_measures(y_true, y_pred, classes_labels))

df_avg = sum(dfs) / len(dfs)
df_final = df_avg[["Precision", "Recall", "F1-Score", "F2-Score", "ROC AUC", "Support"]]
display(df_final)
print(df_final.to_latex(index=True, column_format=col_fmt, caption=f"Performance metrics for architecture combining 6 branches. Average of 5 folds.",label=f"tab:{metric}_6branch_table"))



Unnamed: 0,Precision,Recall,F1-Score,F2-Score,ROC AUC,Support
BBB,0.099264,0.2161,0.135252,0.173862,0.542873,942.8
PR,0.327319,0.257799,0.28792,0.268972,0.595457,1683.4
PVC|VPB,0.01804,0.780714,0.035249,0.082441,0.743231,102.6
LAD,0.004253,0.274089,0.008371,0.019983,0.519178,57.4
RAD,0.008199,0.054374,0.014201,0.025397,0.47452,251.2
RBBB|CRBBB,0.448905,0.940019,0.605408,0.768317,0.93126,921.4
TInv,0.057618,0.480204,0.102768,0.194131,0.558928,661.6
LQT,0.026767,0.458,0.050555,0.108328,0.55892,300.8
AF,0.089474,0.331329,0.140577,0.214333,0.524574,1160.8
AFL,0.030482,0.38509,0.056447,0.115512,0.54896,345.2


\begin{table}
\caption{Performance metrics for architecture combining 6 branches. Average of 5 folds.}
\label{tab:Precision_6branch_table}
\begin{tabular}{cccccc}
\toprule
 & Precision & Recall & F1-Score & F2-Score & ROC AUC & Support \\
\midrule
BBB & 0.099264 & 0.216100 & 0.135252 & 0.173862 & 0.542873 & 942.800000 \\
PR & 0.327319 & 0.257799 & 0.287920 & 0.268972 & 0.595457 & 1683.400000 \\
PVC|VPB & 0.018040 & 0.780714 & 0.035249 & 0.082441 & 0.743231 & 102.600000 \\
LAD & 0.004253 & 0.274089 & 0.008371 & 0.019983 & 0.519178 & 57.400000 \\
RAD & 0.008199 & 0.054374 & 0.014201 & 0.025397 & 0.474520 & 251.200000 \\
RBBB|CRBBB & 0.448905 & 0.940019 & 0.605408 & 0.768317 & 0.931260 & 921.400000 \\
TInv & 0.057618 & 0.480204 & 0.102768 & 0.194131 & 0.558928 & 661.600000 \\
LQT & 0.026767 & 0.458000 & 0.050555 & 0.108328 & 0.558920 & 300.800000 \\
AF & 0.089474 & 0.331329 & 0.140577 & 0.214333 & 0.524574 & 1160.800000 \\
AFL & 0.030482 & 0.385090 & 0.056447 & 0.115512 & 0.548960 & 345.2

In [80]:
df = pd.read_csv("dx_mapping_scored.csv")
df["CPSC CPSC-Extra"] = df["CPSC"] + df["CPSC_Extra"]
df["PTB PTB-XL"] = df["PTB"] + df["PTB_XL"]
df["Chapman-Shaoxing Ningbo"] = df["Chapman_Shaoxing"] + df["Ningbo"]
df.rename(columns={"Dx": "Name", "StPetersburg": "Incart", "Chapman_Shaoxing": "Chapman-Shaoxing"}, inplace=True)
df.drop(columns=["Name", "SNOMEDCTCode", "CPSC", "CPSC_Extra", "PTB", "PTB_XL", "Notes"], inplace=True)
df.fillna("", inplace=True)
df = df[["Abbreviation", "CPSC CPSC-Extra", "Incart", "Georgia", "PTB PTB-XL", "Chapman-Shaoxing Ningbo", "Total"]]
display(df)
print(df.to_latex(index=False,  caption=f"Count of scored classes per dataset.",label=f"tab:Scored_classes"))

Unnamed: 0,Abbreviation,CPSC CPSC-Extra,Incart,Georgia,PTB PTB-XL,Chapman-Shaoxing Ningbo,Total
0,AF,1374,2,570,1529,1780,5255
1,AFL,54,0,186,74,8060,8374
2,BBB,0,1,116,20,385,522
3,Brady,271,11,6,0,7,295
4,CLBBB,0,0,0,0,213,213
5,CRBBB,113,0,28,542,1096,1779
6,IAVB,828,0,769,797,1140,3534
7,IRBBB,86,0,407,1118,246,1857
8,LAD,0,0,940,5146,1545,7631
9,LAnFB,0,0,180,1626,380,2186


\begin{table}
\caption{Count of scored classes per dataset.}
\label{tab:Scored_classes}
\begin{tabular}{lrrrrrr}
\toprule
Abbreviation & CPSC CPSC-Extra & Incart & Georgia & PTB PTB-XL & Chapman-Shaoxing Ningbo & Total \\
\midrule
AF & 1374 & 2 & 570 & 1529 & 1780 & 5255 \\
AFL & 54 & 0 & 186 & 74 & 8060 & 8374 \\
BBB & 0 & 1 & 116 & 20 & 385 & 522 \\
Brady & 271 & 11 & 6 & 0 & 7 & 295 \\
CLBBB & 0 & 0 & 0 & 0 & 213 & 213 \\
CRBBB & 113 & 0 & 28 & 542 & 1096 & 1779 \\
IAVB & 828 & 0 & 769 & 797 & 1140 & 3534 \\
IRBBB & 86 & 0 & 407 & 1118 & 246 & 1857 \\
LAD & 0 & 0 & 940 & 5146 & 1545 & 7631 \\
LAnFB & 0 & 0 & 180 & 1626 & 380 & 2186 \\
LBBB & 274 & 0 & 231 & 536 & 240 & 1281 \\
LQRSV & 0 & 0 & 374 & 182 & 1043 & 1599 \\
NSIVCB & 4 & 1 & 203 & 789 & 771 & 1768 \\
NSR & 922 & 0 & 1752 & 18172 & 8125 & 28971 \\
PAC & 689 & 3 & 639 & 398 & 1312 & 3041 \\
PR & 3 & 0 & 0 & 296 & 1182 & 1481 \\
PRWP & 0 & 0 & 0 & 0 & 638 & 638 \\
PVC & 188 & 0 & 0 & 0 & 1091 & 1279 \\
LPR & 0 & 0 & 0 & 340 

In [81]:
df = pd.read_csv("dx_mapping_unscored.csv")
df["Dx"] = df["Dx"].str.capitalize()
df["CPSC CPSC-Extra"] = df["CPSC"] + df["CPSC_Extra"]
df["PTB PTB-XL"] = df["PTB"] + df["PTB_XL"]
df["Chapman-Shaoxing Ningbo"] = df["Chapman_Shaoxing"] + df["Ningbo"]
df_just_names = df[["Dx", "Abbreviation"]]
col_abbr = df_just_names.pop("Abbreviation")
df_just_names.insert(0, "Abbreviation", col_abbr)

df.rename(columns={"Dx": "Name", "StPetersburg": "Incart", "Chapman_Shaoxing": "Chapman-Shaoxing"}, inplace=True)
df.drop(columns=["Name", "SNOMEDCTCode", "CPSC", "CPSC_Extra", "PTB", "PTB_XL"], inplace=True)
df.fillna("", inplace=True)
df = df[["Abbreviation", "CPSC CPSC-Extra", "Incart", "Georgia", "PTB PTB-XL", "Chapman-Shaoxing Ningbo","Total"]]
display(df)
print(df.to_latex(index=False,  caption=f"Count of unscored classes per dataset.",label=f"tab:Scored_classes"))

print(df_just_names.to_latex(index=False,  caption=f"Count of unscored classes per dataset.",label=f"tab:Unscored_classes"))

Unnamed: 0,Abbreviation,CPSC CPSC-Extra,Incart,Georgia,PTB PTB-XL,Chapman-Shaoxing Ningbo,Total
0,AAR,0,0,0,0,16,16
1,abQRS,0,0,0,3389,0,3389
2,AED,0,0,0,0,17,17
3,AIVR,0,0,0,0,14,14
4,AJR,0,0,19,0,12,31
...,...,...,...,...,...,...,...
98,VPVC,0,23,0,0,0,23
99,VTach,1,1,0,10,0,12
100,VTrig,4,4,1,20,8,37
101,WAP,0,0,7,0,2,9


\begin{table}
\caption{Count of unscored classes per dataset.}
\label{tab:Scored_classes}
\begin{tabular}{lrrrrrr}
\toprule
Abbreviation & CPSC CPSC-Extra & Incart & Georgia & PTB PTB-XL & Chapman-Shaoxing Ningbo & Total \\
\midrule
AAR & 0 & 0 & 0 & 0 & 16 & 16 \\
abQRS & 0 & 0 & 0 & 3389 & 0 & 3389 \\
AED & 0 & 0 & 0 & 0 & 17 & 17 \\
AIVR & 0 & 0 & 0 & 0 & 14 & 14 \\
AJR & 0 & 0 & 19 & 0 & 12 & 31 \\
ALR & 0 & 0 & 12 & 0 & 0 & 12 \\
AMI & 0 & 6 & 0 & 0 & 49 & 55 \\
AMIs & 1 & 0 & 1 & 0 & 0 & 2 \\
AnMIs & 0 & 0 & 281 & 44 & 0 & 325 \\
AnMI & 62 & 0 & 0 & 354 & 57 & 473 \\
AB & 0 & 3 & 0 & 0 & 3 & 6 \\
AFAFL & 39 & 0 & 2 & 0 & 0 & 41 \\
AH & 2 & 0 & 60 & 0 & 0 & 62 \\
AP & 0 & 0 & 52 & 0 & 0 & 52 \\
ARH & 0 & 0 & 0 & 0 & 215 & 215 \\
ATach & 15 & 0 & 28 & 0 & 297 & 340 \\
AVB & 5 & 0 & 74 & 0 & 244 & 323 \\
AVD & 0 & 0 & 0 & 0 & 59 & 59 \\
AVJR & 6 & 0 & 0 & 0 & 139 & 145 \\
AVNRT & 0 & 0 & 0 & 0 & 16 & 16 \\
AVRT & 0 & 0 & 0 & 0 & 26 & 26 \\
BPAC & 2 & 3 & 0 & 0 & 62 & 67 \\
BRU & 0 &