In [16]:
from tsai.all import *
from fastai.metrics import FBeta, RocAuc
from sklearn.metrics import accuracy_score, roc_auc_score, f1_score
from sklearn.model_selection import train_test_split
from helper_code import *
import numpy as np
import pandas as pd
my_setup()

os              : macOS-15.1-arm64-arm-64bit
python          : 3.11.7
tsai            : 0.4.0
fastai          : 2.7.19
fastcore        : 1.7.29
torch           : 2.5.1
device          : mps
cpu cores       : 14
threads per cpu : 1
RAM             : 24.0 GB
GPU memory      : N/A


In [2]:
def extract_features(record):
    header = load_header(record)
    age = get_age(header)
    sex = get_sex(header)

    one_hot_encoding_sex = np.zeros(3, dtype=bool)
    if sex == 'Female':
        one_hot_encoding_sex[0] = 1
    elif sex == 'Male':
        one_hot_encoding_sex[1] = 1
    else:
        one_hot_encoding_sex[2] = 1

    signal, fields = load_signals(record)

    target_length = 4096
    if len(signal) > target_length:
        padded_signal = signal[:target_length]
    else:
        total_padding = target_length - len(signal)
        padding = total_padding // 2
        padded_signal = np.pad(signal, ((padding, total_padding - padding), (0, 0)), 'constant', constant_values=(0, 0))

    return padded_signal, age, sex

In [3]:
data_folder = '/Users/victorli/Documents/GitHub/Physionet-2025/smallest_training_set/'
records = find_records(data_folder)
num_records = len(records)
ecg_signals = []
age = []
sex = []
labels = []

for record in records:
    record = os.path.join(data_folder, record)
    record_signals, record_age, record_sex = extract_features(record)
    record_label = load_label(record)
    ecg_signals.append(record_signals.T)
    age.append(record_age)
    sex.append(record_sex)
    labels.append(record_label)

ecg_signals = np.array(ecg_signals)
labels = np.array(labels)

In [None]:
batch_size = 32
num_workers = 4
num_eopchs = 1
lr = 1e-3
archs = [
    (LSTM, {}),                # LSTM (Hochreiter, 1997)
    (GRU, {}),                 # GRU (Cho, 2014)
    (MLP, {}),                 # MLP - Multilayer Perceptron (Wang, 2016)
    (FCN, {}),                 # FCN - Fully Convolutional Network (Wang, 2016)
    (ResNet, {}),              # ResNet - Residual Network (Wang, 2016)
    (LSTM_FCN, {}),            # LSTM-FCN (Karim, 2017)
    (GRU_FCN, {}),             # GRU-FCN (Elsayed, 2018)
    (mWDN, {'levels': 4}),     # mWDN - Multilevel wavelet decomposition network (Wang, 2018)
    (TCN, {}),                 # TCN - Temporal Convolutional Network (Bai, 2018)
    (MLSTM_FCN, {}),           # MLSTM-FCN - Multivariate LSTM-FCN (Karim, 2019)
    (InceptionTime, {}),       # InceptionTime (Fawaz, 2019)
    (MiniRocket, {}),              # Rocket (Dempster, 2019)
    (XceptionTime, {}),        # XceptionTime (Rahimian, 2019)
    (ResCNN, {}),              # ResCNN - 1D-ResCNN (Zou, 2019)
    (TabModel, {}),            # TabModel - modified from fastai’s TabularModel
    (OmniScaleCNN, {}),        # OmniScaleCNN - Omni-Scale 1D-CNN (Tang, 2020)
    (TST, {}),                 # TST - Time Series Transformer (Zerveas, 2020)
    (TabTransformer, {}),      # TabTransformer (Huang, 2020)
    (TSiT, {}),                # TSiT - Adapted from ViT (Dosovitskiy, 2020)
    (MiniRocket, {}),          # MiniRocket (Dempster, 2021)
    (XCM, {}),                 # XCM - An Explainable Convolutional Neural Network (Fauvel, 2021)
    (gMLP, {}),                # gMLP - Gated Multilayer Perceptron (Liu, 2021)
    (TSPerceiver, {}),         # TSPerceiver - Adapted from Perceiver IO (Jaegle, 2021)
    (GatedTabTransformer, {}), # GatedTabTransformer (Cholakov, 2022)
    (TSSequencerPlus, {}),     # TSSequencerPlus - Adapted from Sequencer (Tatsunami, 2022)
    (PatchTST, {})             # PatchTST (Nie, 2022)
]

for i, (arch, k) in enumerate(archs):
    print(f"********************** {arch.__name__} **********************")
    for split in range(1):
        print(f"------------------- Fold {split} -------------------")
        X_train, X_temp, y_train, y_temp = train_test_split(ecg_signals, labels, test_size=0.2, random_state=42)
        X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)
        X, y, splits = combine_split_data([X_train, X_val], [y_train, y_val])


        tfms  = [None, [Categorize()]]
        dsets = TSDatasets(X, y, tfms=tfms, splits=splits, inplace=True)
        dls  = TSDataLoaders.from_dsets(dsets.train, dsets.valid, bs=[batch_size, batch_size*2], num_workers=num_workers)
        model = create_model(arch, dls=dls, **k)
        learn = Learner(dls, model,  metrics=[accuracy, FBeta(beta=1)]) #FBeta is basically F1
        start = time.time()
        learn.fit_one_cycle(num_eopchs, lr)
        elapsed = time.time() - start


        valid_dl = dls.valid
        test_ds = valid_dl.dataset.add_test(X_test, y_test)
        test_dl = valid_dl.new(test_ds)
        test_probas, test_targets, test_preds = learn.get_preds(dl=test_dl, with_decoded=True)

        print(f"Test Accuracy: {accuracy_score(test_targets, test_preds)}")
        print(f"Test AUROC: {roc_auc_score(test_targets, test_preds)}")
        print(f"Test F1: {f1_score(test_targets, test_preds)}")
        print(f"Test Challenge Score: {compute_challenge_score(test_targets, test_probas[:, 1])}")
        print(f"Time taken: {elapsed} seconds")


********************** LSTM **********************
------------------- Fold 0 -------------------


epoch,train_loss,valid_loss,accuracy,fbeta_score,time
0,0.683805,0.666334,0.882353,0.0,00:00


Test Accuracy: 0.8333333333333334
Test AUROC: 0.46875
Test F1: 0.0
Test Challenge Score: 0.0
Time taken: 0.8010611534118652 seconds
********************** GRU **********************
------------------- Fold 0 -------------------


epoch,train_loss,valid_loss,accuracy,fbeta_score,time


KeyboardInterrupt: 