In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
# Standard libraries

import math
import os
import sys

os.environ['TF_GPU_ALLOCATOR'] = 'cuda_malloc_async'


# Third-party libraries
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from tqdm import tqdm

# Scikit-learn
from sklearn.calibration import calibration_curve
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Keras
import keras
from keras import backend as K
from keras.callbacks import EarlyStopping
from keras.layers import Input, Dense, Conv1D, Dropout, Activation, Flatten
from keras.metrics import AUC
from keras.models import Model, Sequential
from keras.optimizers import Adam
from keras.utils import Sequence

# TensorFlow
import tensorflow as tf
from tensorflow.keras.layers import Input, Dropout, Dense, Flatten, LayerNormalization, MultiHeadAttention
from tensorflow.keras.models import Model

# Local imports
from models import Attia_et_al_CNN

from utils import split_train_val_test

# Environment variables
os.environ['CUDA_VISIBLE_DEVICES'] = '3'

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd
2024-04-23 14:51:37.117223: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-23 14:51:37.117250: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-23 14:51:37.118207: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory 

In [3]:
# load diagnostic_data.pickle
data_df = pd.read_pickle('data/arrythmia_dataset.pickle')

In [None]:
conditions = ['1AVB', 'AF', 'AFIB', 'APB', 'AQW', 'IDC',
              'LVH', 'LVQRSAL', 'RBBB', 'SR', 'ST',
              'STDD', 'STE', 'STTC', 'SVT', 'TWC',
              'TWO']

output_size = len(conditions)
model = Attia_et_al_CNN(output_size=output_size).build(input_shape=(5000, 6))

In [None]:
MODEL_TYPE = 'limb' # Optimal, precordial, limb

if MODEL_TYPE == 'optimal':
    sel_leads = [0, 1, 3, 9, 10, 11]
if MODEL_TYPE == 'limb':
    sel_leads = [0, 1, 2, 3, 4, 5]
if MODEL_TYPE == 'precordial':
    sel_leads = [6, 7, 8, 9, 10, 11]
if MODEL_TYPE == 'worst':
    sel_leads = [2, 4, 5, 6, 7, 8]

In [None]:
# Load the data
X, y = data_df['wf'].to_numpy(), data_df[conditions].to_numpy()
y = y.astype(float)
X = np.stack(X, axis=0)

X = X[:, :, sel_leads]

del data_df

print(f"X shape: {X.shape}")
print(f"y shape: {y.shape}")
print(f"y mean: {y.mean()}")

In [None]:
# Split into train and test
X_train, X_val, X_test, y_train, y_val, y_test = split_train_val_test(X, y, train_size=0.7, val_size=0.15)
del X, y

In [None]:
def generator(X, y, batch_size=8):
    row_nums = np.arange(X.shape[0])
    np.random.shuffle(row_nums)
    for i in range(0, len(row_nums), batch_size):
        current_idxs = row_nums[i:i+batch_size]

        yield X[current_idxs], y[current_idxs,:]

output_signature = (
    tf.TensorSpec(shape=(None, 5000, 6), dtype=tf.float32),
    tf.TensorSpec(shape=(None, output_size), dtype=tf.float32)
)

train_ds = tf.data.Dataset.from_generator(generator=lambda: generator(X_train,y_train, 8), output_signature=output_signature)
val_ds = tf.data.Dataset.from_generator(generator=lambda: generator(X_val,y_val, 8), output_signature=output_signature)

In [None]:
learning_rate =1e-3
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss',  # Monitor validation loss
    factor=0.5,  # Reduce learning rate by half when triggered
    patience=3,  # Number of epochs with no improvement to trigger the callback
    verbose=1,  # Print messages
    min_lr=1e-8  # Minimum learning rate
)
early_stopping = EarlyStopping(monitor='val_loss', patience=6, mode='min', restore_best_weights=True)
model.compile(loss='binary_crossentropy', optimizer=tf.keras.optimizers.legacy.Adam(learning_rate=learning_rate), metrics=['accuracy', AUC(name='auc')])
# Training parameters
EPOCHS = 50  # You can adjust based on your needs

In [None]:
history = model.fit(train_ds,
    epochs=EPOCHS,
    shuffle=True,
    validation_data=val_ds,
    callbacks=[reduce_lr, early_stopping],
    verbose=1)

In [None]:
from sklearn.metrics import roc_auc_score

y_pred = model.predict(X_test)

for i, condition in enumerate(conditions):
    auc = roc_auc_score(y_test[:,i], y_pred[:,i])
    print(f"{condition}: {auc}")
    

In [None]:
y_pred[0,:]

In [None]:
# Calculate AUROC
pred = model.predict(X_test)
auc = roc_auc_score(y_test, pred)
print(f"Test AUROC: {auc:.3f}")

# Train AUROC
pred_train = model.predict(X_train, verbose=0)
auc_train = roc_auc_score(y_train, pred_train)
print(f"Train AUROC: {auc_train:.3f}")

# Train AUROC
pred_val = model.predict(X_val, verbose=0)
auc_val = roc_auc_score(y_val, pred_val)
print(f"Val AUROC: {auc_train:.3f}")

In [None]:
os.makedirs('models', exist_ok=True)
model.save(f'models/12-lead/multi_output_cnn_{MODEL_TYPE}.keras')

In [None]:
# Load the model
model = keras.models.load_model(f'models/12-lead/multi_output_cnn.keras')