In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
# Standard libraries

import math
import os
import sys

os.environ['TF_GPU_ALLOCATOR'] = 'cuda_malloc_async'


# Third-party libraries
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from tqdm import tqdm

# Scikit-learn
from sklearn.calibration import calibration_curve
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Keras
import keras
from keras import backend as K
from keras.callbacks import EarlyStopping
from keras.layers import Input, Dense, Conv1D, Dropout, Activation, Flatten
from keras.metrics import AUC
from keras.models import Model, Sequential
from keras.optimizers import Adam
from keras.utils import Sequence

# TensorFlow
import tensorflow as tf
from tensorflow.keras.layers import Input, Dropout, Dense, Flatten, LayerNormalization, MultiHeadAttention
from tensorflow.keras.models import Model

# Local imports
from models import Attia_et_al_CNN

# Environment variables
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

from utils import split_train_val_test, load_X_y

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd
2024-04-15 12:38:29.659049: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-15 12:38:29.659083: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-15 12:38:29.660158: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory 

In [3]:
LEAD_NAME = 'V3'

In [4]:
# load diagnostic_data.pickle
data_df = pd.read_pickle('data/arrythmia_dataset.pickle')

In [5]:
conditions = ['1AVB', 'AF', 'AFIB', 'APB', 'AQW', 'IDC',
              'LVH', 'LVQRSAL', 'RBBB', 'SR', 'ST',
              'STDD', 'STE', 'STTC', 'SVT', 'TWC',
              'TWO']

output_size = len(conditions)
model = Attia_et_al_CNN(output_size=output_size).build(input_shape=(5000, 1))

Attia et al. CNN model initialized with the following parameters:
  filter_numbers: [16, 16, 32, 32, 64, 64]
  kernel_widths: [5, 5, 5, 3, 3, 3]
  pool_sizes: [2, 2, 4, 2, 2, 4]
  spatial_num_filters: 64
  dense_dropout_rate: 0.2
  spatial_dropout_rate: 0.2
  dense_units: [64, 32]
  use_spatial_layer: False


2024-04-15 12:38:55.399600: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-04-15 12:38:55.430088: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-04-15 12:38:55.430423: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 5000, 1)]         0         
                                                                 
 conv1d (Conv1D)             (None, 5000, 16)          96        
                                                                 
 batch_normalization (Batch  (None, 5000, 16)          64        
 Normalization)                                                  
                                                                 
 activation (Activation)     (None, 5000, 16)          0         
                                                                 
 max_pooling1d (MaxPooling1  (None, 2500, 16)          0         
 D)                                                              
                                                                 
 spatial_dropout1d (Spatial  (None, 2500, 16)          0     

In [6]:
# Load the data
X, y = data_df['wf'].to_numpy(), data_df[conditions].to_numpy()
y = y.astype(float)
X = np.stack(X, axis=0)

lead_labels = ['I', 'II', 'III', 'aVR', 'aVF', 'aVL', 'V1', 'V2', 'V3', 'V4', 'V5', 'V6']
lead_idx = lead_labels.index(LEAD_NAME)
X = X[:, :, lead_idx].reshape(-1, 5000, 1)

del data_df

print(f"X shape: {X.shape}")
print(f"y shape: {y.shape}")

X shape: (45053, 5000, 1)
y shape: (45053, 17)


In [7]:
# Split into train and test
X_train, X_val, X_test, y_train, y_val, y_test = split_train_val_test(X, y, train_size=0.7, val_size=0.15)
del X, y

In [8]:
def generator(X, y, batch_size=8):
    row_nums = np.arange(X.shape[0])
    np.random.shuffle(row_nums)
    for i in range(0, len(row_nums), batch_size):
        current_idxs = row_nums[i:i+batch_size]

        yield X[current_idxs], y[current_idxs,:]

output_signature = (
    tf.TensorSpec(shape=(None, 5000, 1), dtype=tf.float32),
    tf.TensorSpec(shape=(None, output_size), dtype=tf.float32)
)

train_ds = tf.data.Dataset.from_generator(generator=lambda: generator(X_train,y_train, 8), output_signature=output_signature)
val_ds = tf.data.Dataset.from_generator(generator=lambda: generator(X_val, y_val, 8), output_signature=output_signature)

In [9]:
learning_rate =1e-3
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss',  # Monitor validation loss
    factor=0.5,  # Reduce learning rate by half when triggered
    patience=3,  # Number of epochs with no improvement to trigger the callback
    verbose=1,  # Print messages
    min_lr=1e-8  # Minimum learning rate
)
early_stopping = EarlyStopping(monitor='val_loss', patience=6, mode='min', restore_best_weights=True)
model.compile(loss='binary_crossentropy', optimizer=tf.keras.optimizers.legacy.Adam(learning_rate=learning_rate), metrics=['accuracy', AUC(name='auc')])
# Training parameters
EPOCHS = 50  # You can adjust based on your needs



In [10]:
history = model.fit(train_ds,
    epochs=EPOCHS,
    shuffle=True,
    validation_data=val_ds,
    callbacks=[reduce_lr, early_stopping],
    verbose=1)

Epoch 1/50


2024-04-15 12:39:26.303665: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:454] Loaded cuDNN version 8904


   3943/Unknown - 61s 13ms/step - loss: 0.2112 - accuracy: 0.2170 - auc: 0.7626

2024-04-15 12:40:24.067845: I tensorflow/core/framework/local_rendezvous.cc:421] Local rendezvous recv item cancelled. Key hash: 12692995419261124534


Epoch 2/50
   3/3943 [..............................] - ETA: 2:05 - loss: 0.1853 - accuracy: 0.1667 - auc: 0.8044

2024-04-15 12:40:33.347401: I tensorflow/core/framework/local_rendezvous.cc:421] Local rendezvous recv item cancelled. Key hash: 3263483216392039045
2024-04-15 12:40:33.347424: I tensorflow/core/framework/local_rendezvous.cc:421] Local rendezvous recv item cancelled. Key hash: 14408449931047340520


Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 32: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 42: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 48: ReduceLROnPlateau reducing learning rate to 0.0001250000059371814.
Epoch 49/50
Epoch 50/50


In [12]:
from sklearn.metrics import roc_auc_score

y_pred = model.predict(X_test)

for i, condition in enumerate(conditions):
    auc = roc_auc_score(y_test[:,i], y_pred[:,i])
    print(f"{condition}: {auc}")

1AVB: 0.9600359608745684
AF: 0.9650265722301078
AFIB: 0.9210275016343574
APB: 0.8143991749705347
AQW: 0.7236144063156688
IDC: 0.8771143901231042
LVH: 0.8839283688265127
LVQRSAL: 0.6606830298296902
RBBB: 0.9078223039695275
SR: 0.9795669894637579
ST: 0.9936498210395269
STDD: 0.8408127534916272
STE: 0.7015855216023426
STTC: 0.7613098608239957
SVT: 0.9912106043049496
TWC: 0.7743667597734711
TWO: 0.8530909778386152


In [11]:
save_dir = f'models/single-lead'
if not os.path.exists(save_dir):
    os.makedirs(save_dir)
model.save(f'{save_dir}/mult_output_cnn_{LEAD_NAME}.keras')