# Replicate CinC2020 Methodology with Embeddings for comparison

In [1]:
import json
import pickle
from glob import glob

import torch
import numpy as np
import pandas as pd
from xgboost import XGBClassifier
import zarr

from utils import ElapsedTimer
from utils.evaluation_helper import evaluate_score_batch
from utils.evaluate_12ECG_score import load_table

In [2]:
features = pd.read_csv("cinc-2020-redux/features.csv", index_col="header_file")
features.sort_values(by=["header_file"], inplace=True)
features

Unnamed: 0_level_0,III_ECG_Rate_Mean,III_HRV_CVNN,III_HRV_CVSD,III_HRV_IQRNN,III_HRV_MedianNN,III_HRV_S,III_HRV_SD1d,III_HRV_SD2d,III_HRV_TINN,III_hb__absolute_sum_of_changes,...,aVR_sig__median,aVR_sig__minimum,aVR_sig__number_peaks__n_50,aVR_sig__quantile__q_0.1,aVR_sig__quantile__q_0.6,aVR_sig__quantile__q_0.7,aVR_sig__skewness,aVR_sig__spkt_welch_density__coeff_8,aVR_sig__symmetry_looking__r_0.05,age
header_file,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
data/Training_2/Q0001.hea,102.660956,0.009256,0.016208,5.00,584.0,83.664371,4.723102,3.028644,18.0,1.751985,...,0.002177,-0.172858,12.0,-0.067011,0.008588,0.016795,0.846964,0.049597,1.0,53.0
data/Training_2/Q0002.hea,69.756300,0.008707,0.011467,7.50,862.0,166.686710,5.451081,5.747546,24.0,2.346360,...,0.016472,-0.757538,14.0,-0.051787,0.029876,0.055044,-3.845372,0.106367,1.0,70.0
data/Training_2/Q0003.hea,92.308669,0.008724,0.010497,8.00,650.0,91.294488,2.804758,4.690416,18.0,1.791802,...,0.012728,-0.197668,15.0,-0.034050,0.015892,0.018845,-2.779057,0.016094,0.0,55.0
data/Training_2/Q0004.hea,84.429546,0.025635,0.033100,28.00,708.0,1084.484258,10.237187,16.411370,62.0,1.155505,...,0.005770,-0.389243,11.0,-0.045368,0.013476,0.021139,-2.144045,0.114138,1.0,57.0
data/Training_2/Q0005.hea,74.163064,0.042314,0.052298,50.00,812.0,3633.177770,20.534671,26.115291,170.0,1.564914,...,0.023119,-0.704043,14.0,-0.049241,0.027798,0.034594,-4.140107,0.161882,1.0,51.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
data/WFDB/S0545.hea,106.141317,0.190199,0.219179,48.00,565.0,34706.126763,64.806419,80.438133,907.0,4.620132,...,0.016602,-0.602554,14.0,-0.306025,0.089696,0.128420,-0.133615,0.143935,1.0,66.0
data/WFDB/S0546.hea,112.159460,0.123915,0.127917,26.75,560.0,12306.159210,40.776887,46.536997,395.0,2.801590,...,0.022408,-0.434874,13.0,-0.117383,0.031480,0.044744,-1.411642,0.098990,1.0,66.0
data/WFDB/S0547.hea,78.141272,0.165247,0.112111,38.00,810.0,32598.983554,44.714592,130.346568,613.0,1.981420,...,0.008747,-0.383345,16.0,-0.099498,0.012891,0.022533,-0.363843,0.050389,1.0,75.0
data/WFDB/S0548.hea,71.901590,0.019749,0.020145,14.00,836.0,750.213729,8.773947,15.638121,110.0,1.791737,...,0.021393,-0.625016,17.0,-0.035306,0.024319,0.030525,-4.392511,0.106708,1.0,75.0


In [3]:
dxs = {}
with open("cinc-2020-redux/dxs.txt") as f:
    for line in f.readlines():
        k, v = json.loads(line)
        dxs[k] = v

In [4]:
root = zarr.open_group("data/ecgs.zarr", mode="r")
print(root.info)
print(root.tree())

# check dxs match!
for idx, file_path in enumerate(features.iloc[:100].index.to_list()):
    assert list(root["raw/dx"][idx]) == dxs[file_path], idx


Name        : /
Type        : zarr.hierarchy.Group
Read-only   : True
Store type  : zarr.storage.DirectoryStore
No. members : 5
No. arrays  : 0
No. groups  : 5
Groups      : beats, cleaned, meta, raw, seq_embeddings

/
 ├── beats
 │   ├── r_peak_idxs (43099,) object
 │   ├── valid_r_peak_idxs (43099,) object
 │   ├── window_size_400 (43099,) object
 │   ├── window_size_400_normalized (43099,) object
 │   ├── window_size_400_normalized_flattened (801266, 400, 12) float32
 │   ├── window_size_400_outlier (43099,) int32
 │   └── window_size_400_shape (43099, 3) int32
 ├── cleaned
 │   └── p_signal (43099,) object
 ├── meta
 │   └── record_idx_to_window_400_range (1,) object
 ├── raw
 │   ├── dx (43099,) object
 │   ├── meta (43099, 3) int32
 │   ├── p_signal (43099,) object
 │   └── p_signal_shape (43099, 2) int32
 └── seq_embeddings
     ├── version_0 (43099, 768) float64
     ├── version_1 (43099, 768) float64
     ├── version_10 (43099, 768) float64
     ├── version_11 (43099, 768) flo

In [5]:
with open("data/snomed_ct_dx_map.json", "r") as f:
    SNOMED_CODE_MAP = json.load(f)
# SNOMED_CODE_MAP

In [6]:
weights_file = "evaluation-2020/weights.csv"
rows, cols, all_weights = load_table(weights_file)
assert rows == cols
scored_codes = rows
# scored_codes

## Load same training/validation/test splits

In [7]:
torch_checkpoints = glob("log_beat_autoencoder/*/checkpoints/*.ckpt")
data_configs = {}
for torch_checkpoint in torch_checkpoints:
    state_dict = torch.load(torch_checkpoint)
    raw_data_config = state_dict["hyper_parameters"]["data_config"]
    
    data_config = {}
    for k, v in raw_data_config.items():
        data_config[k] = v.indices
    version_str = torch_checkpoint.split("/")[1]
    data_config["checkpoint"] = torch_checkpoint
    data_configs[version_str] = data_config


print(len(data_configs))
data_configs.keys()


20


dict_keys(['version_11', 'version_0', 'version_6', 'version_12', 'version_19', 'version_8', 'version_17', 'version_9', 'version_15', 'version_13', 'version_4', 'version_3', 'version_10', 'version_16', 'version_18', 'version_1', 'version_2', 'version_7', 'version_5', 'version_14'])

In [8]:
data_configs["version_0"].keys()

dict_keys(['train_records', 'val_records', 'test_records', 'checkpoint'])

### Train Classifiers per label

In [9]:
def _determine_sample_weights(
    data_set, scored_codes, label_weights, weight_threshold=0.5
):
    """Using the scoring labels weights to increase the dataset size of positive labels
    """
    data_labels = []
    sample_weights = []
    for dt in data_set:
        sample_weight = None
        for dx in dt:
            if str(dx) in scored_codes:
                _sample_weight = label_weights[scored_codes.index(str(dx))]
                if _sample_weight < weight_threshold:
                    continue
                if sample_weight is None or _sample_weight > sample_weight:
                    sample_weight = _sample_weight

        if sample_weight is None:
            # not a scored label, treat as a negative example (weight of 1)
            sample_weight = 1.0
            data_labels.append(False)
        else:
            data_labels.append(True)
        sample_weights.append(sample_weight)
    return data_labels, sample_weights


def _train_label_classifier(
    sc,
    idx_sc,
    all_weights,
    train_features,
    train_labels,
    eval_features,
    eval_labels,
    scored_codes,
    early_stopping_rounds,
    num_gpus,
):
    label_weights = all_weights[idx_sc]
    train_labels, train_weights = _determine_sample_weights(
        train_labels, scored_codes, label_weights
    )
    eval_labels, eval_weights = _determine_sample_weights(
        eval_labels, scored_codes, label_weights
    )

    # try negative over positive https://machinelearningmastery.com/xgboost-for-imbalanced-classification/
    pos_count = len([e for e in train_labels if e])
    pos_count = max(pos_count, 1)
    scale_pos_weight = (len(train_labels) - pos_count) / pos_count

    model = XGBClassifier(
        booster="dart",  # gbtree, dart or gblinear
        verbosity=0,
        tree_method = "gpu_hist",
        sampling_method = "gradient_based",
        scale_pos_weight=scale_pos_weight,
    )

    eval_set = [
        (train_features, train_labels), (eval_features, eval_labels)
    ]
    sample_weight_eval_set = [
        train_weights, eval_weights
    ]

    model = model.fit(
        train_features,
        train_labels,
        sample_weight=train_weights,
        eval_set=eval_set,
        sample_weight_eval_set=sample_weight_eval_set,
        early_stopping_rounds=early_stopping_rounds,
        verbose=False,
    )

    return sc, model

def train_experiment(
    data_config,
    all_weights=all_weights,
    scored_codes=scored_codes,
    features=features,
    root=root,
    early_stopping_rounds=20,
    
):
    train_idxs = data_config["train_records"]
    val_idxs = data_config["val_records"]
    test_idxs = data_config["test_records"]
    
    checkpoint = data_config["checkpoint"]
    version_str = checkpoint.split("/")[1]

    embeddings = root[f"seq_embeddings/{version_str}"]
    raw_features = features.to_numpy()
    embd_features = np.concatenate((raw_features, embeddings), axis=1)
        
    train_features, train_labels = np.take(embd_features, train_idxs, axis=0), np.take(root["raw/dx"], train_idxs)
    eval_features, eval_labels = np.take(embd_features, val_idxs, axis=0), np.take(root["raw/dx"], val_idxs)
    test_features, test_labels = np.take(embd_features, test_idxs, axis=0), np.take(root["raw/dx"], test_idxs)
    
    classes = []
    labels = []
    scores = []
    
    models = {}
    
    for idx_sc, sc in enumerate(scored_codes):
        with ElapsedTimer() as t:
            print(f"Training {SNOMED_CODE_MAP[sc][1]} classifier...", end="")
            sc, model = _train_label_classifier(
                sc,
                idx_sc,
                all_weights,
                train_features,
                train_labels,
                eval_features,
                eval_labels,
                scored_codes,
                early_stopping_rounds,
                1,
            )
            classes.append(str(sc))
            labels.append(model.predict(test_features).tolist())
            scores.append(model.predict_proba(test_features)[:, 1].tolist())
            models[sc] = model
        print(f"Took {t.duration:.2f} seconds")

    (
        classes,
        auroc,
        auprc,
        auroc_classes,
        auprc_classes,
        accuracy,
        f_measure,
        f_measure_classes,
        f_beta_measure,
        g_beta_measure,
        challenge_metric,
    ) = evaluate_score_batch(
        predicted_classes=classes,
        predicted_labels=np.array(labels).T,
        predicted_probabilities=np.array(scores).T,
        raw_ground_truth_labels=test_labels,
    )
        
    log = {
        "test_auroc": auroc,
        "test_auprc": auprc,
        "test_accuracy": accuracy,
        "test_f_measure": f_measure,
        "test_f_beta_measure": f_beta_measure,
        "test_g_beta_measure": g_beta_measure,
        "test_challenge_metric": challenge_metric,
    }
    class_output_string = "Classes,{}\nAUROC,{}\nAUPRC,{}\nF-measure,{}".format(
        ",".join(classes),
        ",".join("{:.3f}".format(x) for x in auroc_classes),
        ",".join("{:.3f}".format(x) for x in auprc_classes),
        ",".join("{:.3f}".format(x) for x in f_measure_classes),
    )

    print(log)
    return log, class_output_string, models


In [10]:
for version_key, data_config in data_configs.items():
    print(f"Experiment {version_key}")
    with ElapsedTimer() as t:
        log, class_output_string, models = train_experiment(data_config)

        with open(f"cinc-2020-redux-embedding/{version_key}_test_results.json", "w") as f:
            json.dump(log, f)
        with open(f"cinc-2020-redux-embedding/{version_key}_test_class_labelwise.csv", "w") as f:
            f.write(class_output_string)
        with open(f"cinc-2020-redux-embedding/{version_key}_models.pkl", "wb") as f:
            pickle.dump(models, f)
            
    print(f"Experiment {version_key} Took {t.duration:.2f}s")

Experiment version_11
Training 1st degree av block classifier...Took 8.76 seconds
Training atrial fibrillation classifier...Took 8.50 seconds
Training atrial flutter classifier...Took 8.33 seconds
Training bradycardia classifier...Took 4.16 seconds
Training complete right bundle branch block classifier...Took 8.32 seconds
Training incomplete right bundle branch block classifier...Took 8.43 seconds
Training left anterior fascicular block classifier...Took 8.28 seconds
Training left axis deviation classifier...Took 8.52 seconds
Training left bundle branch block classifier...Took 7.50 seconds
Training low qrs voltages classifier...Took 7.40 seconds
Training nonspecific intraventricular conduction disorder classifier...Took 8.28 seconds
Training pacing rhythm classifier...Took 7.56 seconds
Training premature atrial contraction classifier...Took 8.15 seconds
Training premature ventricular contractions classifier...Took 8.25 seconds
Training Prolonged PR interval classifier...Took 6.03 secon

NameError: name 'dc_idx' is not defined