In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
# Standard libraries

import math
import os
import sys

os.environ['TF_GPU_ALLOCATOR'] = 'cuda_malloc_async'


# Third-party libraries
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from tqdm import tqdm

# Scikit-learn
from sklearn.calibration import calibration_curve
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Keras
import keras
from keras import backend as K
from keras.callbacks import EarlyStopping
from keras.layers import Input, Dense, Conv1D, Dropout, Activation, Flatten
from keras.metrics import AUC
from keras.models import Model, Sequential
from keras.optimizers import Adam
from keras.utils import Sequence

# TensorFlow
import tensorflow as tf
from tensorflow.keras.layers import Input, Dropout, Dense, Flatten, LayerNormalization, MultiHeadAttention
from tensorflow.keras.models import Model

# Local imports
from models import Attia_et_al_CNN

from utils import split_train_val_test

# Environment variables
os.environ['CUDA_VISIBLE_DEVICES'] = '3'

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd
2024-04-10 14:06:36.878518: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-10 14:06:36.878548: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-10 14:06:36.879661: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory 

In [3]:
# load diagnostic_data.pickle
data_df = pd.read_pickle('data/arrythmia_dataset.pickle')

In [4]:
conditions = ['1AVB', 'AF', 'AFIB', 'APB', 'AQW', 'IDC',
              'LVH', 'LVQRSAL', 'RBBB', 'SR', 'ST',
              'STDD', 'STE', 'STTC', 'SVT', 'TWC',
              'TWO']

output_size = len(conditions)
model = Attia_et_al_CNN(output_size=output_size).build()

Attia et al. CNN model initialized with the following parameters:
  filter_numbers: [16, 16, 32, 32, 64, 64]
  kernel_widths: [5, 5, 5, 3, 3, 3]
  pool_sizes: [2, 2, 4, 2, 2, 4]
  spatial_num_filters: 64
  dense_dropout_rate: 0.2
  spatial_dropout_rate: 0.2
  dense_units: [64, 32]
  use_spatial_layer: False


2024-04-10 14:10:24.420790: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-04-10 14:10:24.463468: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-04-10 14:10:24.463812: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 5000, 12)]        0         
                                                                 
 conv1d (Conv1D)             (None, 5000, 16)          976       
                                                                 
 batch_normalization (Batch  (None, 5000, 16)          64        
 Normalization)                                                  
                                                                 
 activation (Activation)     (None, 5000, 16)          0         
                                                                 
 max_pooling1d (MaxPooling1  (None, 2500, 16)          0         
 D)                                                              
                                                                 
 spatial_dropout1d (Spatial  (None, 2500, 16)          0     

In [5]:
# Load the data
X, y = data_df['wf'].to_numpy(), data_df[conditions].to_numpy()
y = y.astype(float)
X = np.stack(X, axis=0)

del data_df

print(f"X shape: {X.shape}")
print(f"y shape: {y.shape}")
print(f"y mean: {y.mean()}")

X shape: (45053, 5000, 12)
y shape: (45053, 17)
y mean: 0.06011351336530439


In [6]:
# Split into train and test
X_train, X_val, X_test, y_train, y_val, y_test = split_train_val_test(X, y, train_size=0.7, val_size=0.15)
del X, y

In [7]:
def generator(X, y, batch_size=8):
    row_nums = np.arange(X.shape[0])
    np.random.shuffle(row_nums)
    for i in range(0, len(row_nums), batch_size):
        current_idxs = row_nums[i:i+batch_size]

        yield X[current_idxs], y[current_idxs,:]

output_signature = (
    tf.TensorSpec(shape=(None, 5000, 12), dtype=tf.float32),
    tf.TensorSpec(shape=(None, output_size), dtype=tf.float32)
)

train_ds = tf.data.Dataset.from_generator(generator=lambda: generator(X_train,y_train, 8), output_signature=output_signature)
val_ds = tf.data.Dataset.from_generator(generator=lambda: generator(X_val,y_val, 8), output_signature=output_signature)

In [8]:
learning_rate =1e-3
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss',  # Monitor validation loss
    factor=0.5,  # Reduce learning rate by half when triggered
    patience=3,  # Number of epochs with no improvement to trigger the callback
    verbose=1,  # Print messages
    min_lr=1e-8  # Minimum learning rate
)
early_stopping = EarlyStopping(monitor='val_loss', patience=6, mode='min', restore_best_weights=True)
model.compile(loss='binary_crossentropy', optimizer=tf.keras.optimizers.legacy.Adam(learning_rate=learning_rate), metrics=['accuracy', AUC(name='auc')])
# Training parameters
EPOCHS = 50  # You can adjust based on your needs



In [9]:
history = model.fit(train_ds,
    epochs=EPOCHS,
    shuffle=True,
    validation_data=val_ds,
    callbacks=[reduce_lr, early_stopping],
    verbose=1)

Epoch 1/50


2024-04-10 14:10:50.873758: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:454] Loaded cuDNN version 8904


   3943/Unknown - 49s 11ms/step - loss: 0.2079 - accuracy: 0.2450 - auc: 0.7703

2024-04-10 14:11:37.135969: I tensorflow/core/framework/local_rendezvous.cc:421] Local rendezvous recv item cancelled. Key hash: 15851254894432586033


Epoch 2/50
  11/3943 [..............................] - ETA: 43s - loss: 0.2023 - accuracy: 0.3409 - auc: 0.8324

2024-04-10 14:11:40.535947: I tensorflow/core/framework/local_rendezvous.cc:421] Local rendezvous recv item cancelled. Key hash: 5474817431934542826
2024-04-10 14:11:40.536005: I tensorflow/core/framework/local_rendezvous.cc:421] Local rendezvous recv item cancelled. Key hash: 14692816815526548696
2024-04-10 14:11:40.536021: I tensorflow/core/framework/local_rendezvous.cc:421] Local rendezvous recv item cancelled. Key hash: 4008971303760801404


Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 31: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 40: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 44: ReduceLROnPlateau reducing learning rate to 0.0001250000059371814.
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Epoch 50: ReduceLROnPlateau reducing learning rate to 6.25000029685907e-05.


In [10]:
from sklearn.metrics import roc_auc_score

y_pred = model.predict(X_test)

for i, condition in enumerate(conditions):
    auc = roc_auc_score(y_test[:,i], y_pred[:,i])
    print(f"{condition}: {auc}")
    

1AVB: 0.9638099347909473
AF: 0.9681221354801401
AFIB: 0.9240833028964911
APB: 0.8140585378049217
AQW: 0.7729185460195956
IDC: 0.8682842597013195
LVH: 0.9632228638854536
LVQRSAL: 0.7586582659080875
RBBB: 0.8670345295399106
SR: 0.9854833493522901
ST: 0.9967942732648615
STDD: 0.9418983971992128
STE: 0.6623381511592894
STTC: 0.8628587479865345
SVT: 0.9931811101752603
TWC: 0.9027098891581641
TWO: 0.9339386767157304


In [11]:
# Calculate AUROC
pred = model.predict(X_test)
auc = roc_auc_score(y_test, pred)
print(f"Test AUROC: {auc:.3f}")

# Train AUROC
pred_train = model.predict(X_train, verbose=0)
auc_train = roc_auc_score(y_train, pred_train)
print(f"Train AUROC: {auc_train:.3f}")

# Train AUROC
pred_val = model.predict(X_val, verbose=0)
auc_val = roc_auc_score(y_val, pred_val)
print(f"Val AUROC: {auc_train:.3f}")

Test AUROC: 0.893
Train AUROC: 0.907
Train AUROC: 0.907


In [12]:
os.makedirs('models', exist_ok=True)
model.save(f'models/12-lead/multi_output_cnn.keras')

In [None]:
# Load the model
model = keras.models.load_model(f'models/12-lead/multi_output_cnn.keras')