In [2]:
import pandas as pd
import numpy as np
import wfdb
import ast

def load_raw_data(df, sampling_rate):
    if sampling_rate == 100:
        data = [wfdb.rdsamp(f) for f in df.filename_lr]
    else:
        data = [wfdb.rdsamp(f) for f in df.filename_hr]
    data = np.array([signal for signal, meta in data])
    return data

sampling_rate=100

# load and convert annotation data
df = pd.read_csv('ptbxl_database.csv', index_col='ecg_id')
df.scp_codes = df.scp_codes.apply(lambda x: ast.literal_eval(x))

# Load raw signal data
Signals = load_raw_data(df, sampling_rate)

# Load scp_statements.csv for diagnostic aggregation
agg_df = pd.read_csv('scp_statements.csv', index_col=0)
agg_df = agg_df[agg_df.diagnostic == 1]

def aggregate_diagnostic(y_dic):
    tmp = []
    for key in y_dic.keys():
        if key in agg_df.index:
            tmp.append(agg_df.loc[key].diagnostic_class)
    return list(set(tmp))

# Apply diagnostic superclass
df.reset_index(inplace=True)
Labels = df.scp_codes.apply(aggregate_diagnostic)

In [14]:
len(Signals)

21799

In [35]:
np.unique(Labels, return_counts=True)

(array([list([]), list(['CD']), list(['HYP']), list(['HYP', 'CD']),
        list(['MI']), list(['MI', 'CD']), list(['MI', 'HYP']),
        list(['MI', 'HYP', 'CD']), list(['MI', 'NORM', 'HYP', 'CD']),
        list(['MI', 'STTC']), list(['MI', 'STTC', 'CD']),
        list(['MI', 'STTC', 'HYP']), list(['MI', 'STTC', 'HYP', 'CD']),
        list(['NORM']), list(['NORM', 'CD']), list(['NORM', 'HYP']),
        list(['NORM', 'HYP', 'CD']), list(['NORM', 'STTC']),
        list(['NORM', 'STTC', 'CD']), list(['STTC']), list(['STTC', 'CD']),
        list(['STTC', 'HYP']), list(['STTC', 'HYP', 'CD'])], dtype=object),
 array([ 411, 1708,  535,  300, 2532, 1297,  183,  117,    1,  599,  223,
         361,  156, 9069,  407,    2,    2,   28,    5, 2400,  471,  781,
         211]))

In [3]:
len(Labels)

21799

In [30]:
Y_encoded = label_encoder.fit_transform(Labels)


TypeError: Encoders require their input argument must be uniformly strings or numbers. Got ['list']

In [31]:
flat_labels = [item for sublist in Labels for item in sublist]

In [32]:
np.unique(flat_labels, return_counts=True)

(array(['CD', 'HYP', 'MI', 'NORM', 'STTC'], dtype='<U4'),
 array([4898, 2649, 5469, 9514, 5235]))

In [34]:
np.unique(y, return_counts=True)

(array(['CD', 'HYP', 'MI', 'NORM', 'STTC'], dtype='<U4'),
 array([1708,  535, 2532, 9069, 2400]))

In [33]:
len(flat_labels)

27765

In [15]:
y=[]
x=[]
for i in range(len(Labels)):
    if Labels[i] == list(['CD']):
        y.append('CD')
        x.append(Signals[i,:,0])
    if Labels[i] == list(['HYP']):
        y.append('HYP')
        x.append(Signals[i,:,0])
    if Labels[i] == list(['NORM']):
        y.append('NORM')
        x.append(Signals[i,:,0])
    if Labels[i] == list(['STTC']):
        y.append('STTC')
        x.append(Signals[i,:,0])
    if Labels[i] == list(['MI']):
        y.append('MI')
        x.append(Signals[i,:,0])

x = np.array(x)
y = np.array(y)

In [9]:
x[1].shape

(1000,)

In [16]:
augmented_signals = []
augmented_labels = []


for label in ['CD', 'HYP', 'MI', 'STTC']:
    target_indices = np.where(y == label)[0]
    augmentation_factor = (np.count_nonzero(y == 'NORM')  // np.count_nonzero(y == label)) -1
    
    for index in target_indices:
        repeated_signal = np.tile(x[index], (augmentation_factor, 1, 1))
        noisy_signal = repeated_signal + 0.01 * np.random.randn(*repeated_signal.shape)
        augmented_signals.append(noisy_signal)
        augmented_labels.extend([y[index]] * augmentation_factor)
    
X = np.concatenate([x] + augmented_signals)
Y = np.concatenate([y, np.array(augmented_labels)])

ValueError: all the input arrays must have same number of dimensions, but the array at index 0 has 2 dimension(s) and the array at index 1 has 3 dimension(s)

In [6]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical

label_encoder = LabelEncoder()
Y_encoded = label_encoder.fit_transform(Y)

Y_onehot = to_categorical(Y_encoded, num_classes=5)

X_train, X_test, y_train, y_test = train_test_split(X, Y_onehot, test_size=0.2, random_state=None)

In [65]:
X = X_train
Y = y_train

In [4]:
# CNN model

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout

model = Sequential([
    Conv1D(128, kernel_size=3, activation='relu', input_shape=(1000, 12)),
    MaxPooling1D(pool_size=2),
    Dropout(0.25),
    
    Conv1D(128, kernel_size=3, activation='relu'),
    MaxPooling1D(pool_size=2),
    Dropout(0.25),

    Conv1D(128, kernel_size=3, activation='relu'),
    MaxPooling1D(pool_size=2),
    Dropout(0.25),

    Flatten(),
    Dense(256, activation='relu'),
    Dense(5, activation='softmax')
])

2024-01-18 20:03:26.325748: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M2
2024-01-18 20:03:26.325785: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 8.00 GB
2024-01-18 20:03:26.325793: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 2.67 GB
2024-01-18 20:03:26.326084: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-01-18 20:03:26.326298: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [7]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout, AveragePooling1D

cnn = Sequential([
    Conv1D(256, kernel_size=4, activation='relu', input_shape=(1000,12)),
    MaxPooling1D(pool_size=3),
    Dropout(0.25),
    
    Conv1D(64, kernel_size=3, activation='relu'),
    MaxPooling1D(pool_size=3),
    Dropout(0.25),
    
    Flatten(),
    Dense(128, activation='relu'),
    Dense(5, activation='softmax')
])

In [9]:
X_train.shape

(78539, 1000, 12)

In [8]:
from tensorflow.keras.optimizers.legacy import Adam

optimizer = Adam(learning_rate=0.001)
cnn.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

In [20]:
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator

generator = TimeseriesGenerator(X_train, y_train, length=200, batch_size=8)

model.fit(generator, epochs=3)

Epoch 1/3


ValueError: in user code:

    File "/Users/harshbalgude/miniconda3/lib/python3.11/site-packages/keras/src/engine/training.py", line 1401, in train_function  *
        return step_function(self, iterator)
    File "/Users/harshbalgude/miniconda3/lib/python3.11/site-packages/keras/src/engine/training.py", line 1384, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/Users/harshbalgude/miniconda3/lib/python3.11/site-packages/keras/src/engine/training.py", line 1373, in run_step  **
        outputs = model.train_step(data)
    File "/Users/harshbalgude/miniconda3/lib/python3.11/site-packages/keras/src/engine/training.py", line 1150, in train_step
        y_pred = self(x, training=True)
    File "/Users/harshbalgude/miniconda3/lib/python3.11/site-packages/keras/src/utils/traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "/Users/harshbalgude/miniconda3/lib/python3.11/site-packages/keras/src/engine/input_spec.py", line 235, in assert_input_compatibility
        raise ValueError(

    ValueError: Exception encountered when calling layer 'sequential_1' (type Sequential).
    
    Input 0 of layer "max_pooling1d_3" is incompatible with the layer: expected ndim=3, found ndim=4. Full shape received: (None, None, None, 128)
    
    Call arguments received by layer 'sequential_1' (type Sequential):
      • inputs=tf.Tensor(shape=(None, None, None, None), dtype=float32)
      • training=True
      • mask=None


In [9]:
cnn.fit(
    X_train, y_train,
    validation_split = 0.2,
    epochs=5,
    batch_size = 8,
)

Epoch 1/5


2024-01-18 20:08:28.136246: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x4d229d250>

In [13]:
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator

generator = TimeseriesGenerator(X_train, y_train, length=1000, batch_size=32)

In [14]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d (Conv1D)             (None, 998, 128)          4736      
                                                                 
 max_pooling1d (MaxPooling1  (None, 499, 128)          0         
 D)                                                              
                                                                 
 dropout (Dropout)           (None, 499, 128)          0         
                                                                 
 conv1d_1 (Conv1D)           (None, 497, 128)          49280     
                                                                 
 max_pooling1d_1 (MaxPoolin  (None, 248, 128)          0         
 g1D)                                                            
                                                                 
 dropout_1 (Dropout)         (None, 248, 128)          0

In [38]:
len(generator)

9793

In [42]:
len(X_train)

32772

In [80]:
from tensorflow.keras import layers, models

def build_1d_resnet18(input_shape, num_classes):
    input_tensor = layers.Input(shape=input_shape)

    # Initial Convolution
    x = layers.Conv1D(64, kernel_size=7, strides=2, padding='same', activation='relu')(input_tensor)
    x = layers.MaxPooling1D(pool_size=3, strides=2, padding='same')(x)

    # Residual Blocks
    x = residual_block_1d(x, 64, 1)
    x = residual_block_1d(x, 128, 2)
    x = residual_block_1d(x, 256, 2)
    x = residual_block_1d(x, 512, 2)

    # Global Average Pooling
    x = layers.GlobalAveragePooling1D()(x)

    # Fully Connected layer
    x = layers.Dense(num_classes, activation='softmax')(x)

    # Create model
    model = models.Model(inputs=input_tensor, outputs=x, name='resnet18_1d')

    return model

def residual_block_1d(input_tensor, filters, strides):
    shortcut = input_tensor

    # First convolution layer
    x = layers.Conv1D(filters, kernel_size=3, strides=strides, padding='same', activation='relu')(input_tensor)

    # Second convolution layer
    x = layers.Conv1D(filters, kernel_size=3, padding='same', activation='relu')(x)

    # Shortcut connection
    if strides != 1 or input_tensor.shape[-1] != filters:
        shortcut = layers.Conv1D(filters, kernel_size=1, strides=strides, padding='valid', activation='relu')(input_tensor)

    # Add shortcut to main path
    x = layers.add([x, shortcut])

    return x

In [79]:

# Example usage
input_shape_1d = (1000, 12)  # Adjust input size based on your data
num_classes = 5  # Adjust based on your classification task

resnet18_1d_model = build_1d_resnet18(input_shape_1d, num_classes)

# Display model summary
resnet18_1d_model.summary()

Model: "resnet18_1d"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_3 (InputLayer)        [(None, 1000, 12)]           0         []                            
                                                                                                  
 conv1d_29 (Conv1D)          (None, 500, 64)              5440      ['input_3[0][0]']             
                                                                                                  
 max_pooling1d_7 (MaxPoolin  (None, 250, 64)              0         ['conv1d_29[0][0]']           
 g1D)                                                                                             
                                                                                                  
 conv1d_30 (Conv1D)          (None, 250, 64)              12352     ['max_pooling1d_7[0]

In [77]:
from tensorflow.keras.optimizers.legacy import Adam

optimizer = Adam(learning_rate=0.001)
resnet18_1d_model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

In [54]:
from tensorflow.keras.callbacks import EarlyStopping
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

In [58]:
resnet18_1d_model.fit(
    X_train, y_train,
    validation_split = 0.2,
    epochs=100,
    batch_size = 8,
    callbacks = early_stopping
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100


<keras.src.callbacks.History at 0x39f05a250>

In [49]:
predictions = resnet18_1d_model.predict(X_test)



In [60]:
resnet18_1d_model.evaluate(X_test, y_test)



[0.3375917971134186, 0.9210301637649536]

In [None]:
get_model():
    

In [62]:
from sklearn.model_selection import StratifiedKFold
k_folds = 5
skf = StratifiedKFold(n_splits=k_folds, shuffle=True, random_state=42)

In [None]:
# Initialize lists to store the results
losses = []
accuracies = []

# Iterate over the folds
for train_index, test_index in skf.split(X, Y[:,0]):
    X_train, X_val = X[train_index], X[test_index]
    y_train, y_val = Y[train_index], Y[test_index]
    
    resnet18_1d_model.compile(
        optimizer=optimizer, 
        loss='categorical_crossentropy', 
        metrics=['accuracy'])
    
    # Train the model
    history = resnet18_1d_model.fit(
        X_train, y_train, 
        epochs=10, 
        batch_size=32, 
        validation_data=(X_val, y_val), 
        verbose=0)

    # Evaluate the model on the validation set
    loss, accuracy = resnet18_1d_model.evaluate(X_val, y_val, verbose=0)

    # Store the results
    losses.append(loss)
    accuracies.append(accuracy)

# Print the average results over all folds
print(f'Average Loss: {np.mean(losses)}')
print(f'Average Accuracy: {np.mean(accuracies)}')

In [67]:
Y.shape

(32772, 5)

In [73]:
len(X[16244])

1000

In [1]:
len(y)

NameError: name 'y' is not defined