In [1]:
import pandas as pd
import numpy as np
import wfdb
import ast

def load_raw_data(df, sampling_rate):
    if sampling_rate == 100:
        data = [wfdb.rdsamp(f) for f in df.filename_lr]
    else:
        data = [wfdb.rdsamp(f) for f in df.filename_hr]
    data = np.array([signal for signal, meta in data])
    return data

sampling_rate=100

# load and convert annotation data
df = pd.read_csv('ptbxl_database.csv')
df.scp_codes = df.scp_codes.apply(lambda x: ast.literal_eval(x))

# Load raw signal data
Signals = load_raw_data(df, sampling_rate)

# Load scp_statements.csv for diagnostic aggregation
agg_df = pd.read_csv('scp_statements.csv', index_col=0)
agg_df = agg_df[agg_df.diagnostic == 1]

def aggregate_diagnostic(y_dic):
    tmp = []
    for key in y_dic.keys():
        if key in agg_df.index:
            tmp.append(agg_df.loc[key].diagnostic_class)
    return list(set(tmp))

# Apply diagnostic superclass
df.reset_index(inplace = True)
Labels = df.scp_codes.apply(aggregate_diagnostic)

In [15]:
def augment_signals(signals, labels, augmentation_factor = 5, noise_factor=0.01):
    augmented_signals = []
    augmented_labels = []
    
    for index in range(len(labels)):
        repeated_signal = np.tile(signals[index], (augmentation_factor, 1, 1))
        noisy_signal = repeated_signal + noise_factor * np.random.randn(*repeated_signal.shape)
        augmented_signals.append(noisy_signal)
        augmented_labels.extend([labels[index]] * augmentation_factor)
        
    X = np.concatenate([signals] + augmented_signals)
    Y = np.concatenate([labels, augmented_labels])
    
    return X,Y

In [16]:
X,Y = augment_signals(Signals, Labels)

ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (108995,) + inhomogeneous part.

In [24]:
x = []
y = []

valid_labels = [list(['CD']), list(['HYP']), list(['NORM']), list(['STTC']), list(['MI'])]
for i in range(len(Labels)):
    if Labels[i] in valid_labels:
        y.append(Labels[i])
        x.append(Signals[i])
    
x = np.array(x)
y = np.array(y)

augmented_signals = []
augmented_labels = []

for label in ['CD', 'HYP', 'MI', 'STTC']:
    target_indices = np.where(y == label)[0]
    augmentation_factor = (np.count_nonzero(y == 'NORM')  // np.count_nonzero(y == label)) -1
        
    for index in target_indices:
        repeated_signal = np.tile(x[index], (augmentation_factor, 1, 1))
        noisy_signal = repeated_signal + 0.01 * np.random.randn(*repeated_signal.shape)
        augmented_signals.append(noisy_signal)
        augmented_labels.extend([y[index]] * augmentation_factor)
        
X = np.concatenate([x] + augmented_signals)
Y = np.concatenate([y, np.array(augmented_labels)])

In [25]:
from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical

label_encoder = LabelEncoder()
Y_encoded = label_encoder.fit_transform(Y)

Y_onehot = to_categorical(Y_encoded, num_classes=5)

X_train, X_test, Y_train, Y_test = train_test_split(X, Y_onehot, test_size=0.2, random_state=None)

  y = column_or_1d(y, warn=True)


In [2]:
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.model_selection import train_test_split

mlb = MultiLabelBinarizer()
one_hot_encoded_labels = mlb.fit_transform(Labels)

X_train, X_test, y_train, y_test = train_test_split(Signals, one_hot_encoded_labels, test_size = 0.2)

In [42]:
from tensorflow.keras import layers, models

def build_1d_resnet18(input_shape, num_classes):
    input_tensor = layers.Input(shape=input_shape)

    # Initial Convolution
    x = layers.Conv1D(64, kernel_size=7, strides=2, padding='same', activation='relu')(input_tensor)
    x = layers.MaxPooling1D(pool_size=3, strides=2, padding='same')(x)

    # Residual Blocks
    x = residual_block_1d(x, 64, 1)
    x = residual_block_1d(x, 128, 2)
    x = residual_block_1d(x, 256, 2)
    x = residual_block_1d(x, 512, 2)

    # Global Average Pooling
    x = layers.GlobalAveragePooling1D()(x)

    # Fully Connected layer
    x = layers.Dense(num_classes, activation='softmax')(x)

    # Create model
    model = models.Model(inputs=input_tensor, outputs=x, name='resnet18_1d')

    return model

def residual_block_1d(input_tensor, filters, strides):
    shortcut = input_tensor

    # First convolution layer
    x = layers.Conv1D(filters, kernel_size=3, strides=strides, padding='same', activation='relu')(input_tensor)

    # Second convolution layer
    x = layers.Conv1D(filters, kernel_size=3, padding='same', activation='relu')(x)

    # Shortcut connection if needed
    if strides != 1 or input_tensor.shape[-1] != filters:
        shortcut = layers.Conv1D(filters, kernel_size=1, strides=strides, padding='valid', activation='relu')(input_tensor)

    # Add shortcut to main path
    x = layers.add([x, shortcut])

    return x

In [20]:
from tensorflow.keras import layers, models

def build_1d_vgg16(input_shape, num_classes):
    input_tensor = layers.Input(shape=input_shape)

    # Block 1
    x = layers.Conv1D(64, kernel_size=3, padding='same', activation='relu')(input_tensor)
    x = layers.Conv1D(64, kernel_size=3, padding='same', activation='relu')(x)
    x = layers.MaxPooling1D(pool_size=2, strides=2, data_format='channels_last')(x)

    # Block 2
    x = layers.Conv1D(128, kernel_size=3, padding='same', activation='relu')(x)
    x = layers.Conv1D(128, kernel_size=3, padding='same', activation='relu')(x)
    x = layers.MaxPooling1D(pool_size=2, strides=2, data_format='channels_last')(x)

    # Block 3
    x = layers.Conv1D(256, kernel_size=3, padding='same', activation='relu')(x)
    x = layers.Conv1D(256, kernel_size=3, padding='same', activation='relu')(x)
    x = layers.Conv1D(256, kernel_size=3, padding='same', activation='relu')(x)
    x = layers.MaxPooling1D(pool_size=2, strides=2, data_format='channels_last')(x)

    # Block 4
    x = layers.Conv1D(512, kernel_size=3, padding='same', activation='relu')(x)
    x = layers.Conv1D(512, kernel_size=3, padding='same', activation='relu')(x)
    x = layers.Conv1D(512, kernel_size=3, padding='same', activation='relu')(x)
    x = layers.MaxPooling1D(pool_size=2, strides=2, data_format='channels_last')(x)

    # Block 5
    x = layers.Conv1D(512, kernel_size=3, padding='same', activation='relu')(x)
    x = layers.Conv1D(512, kernel_size=3, padding='same', activation='relu')(x)
    x = layers.Conv1D(512, kernel_size=3, padding='same', activation='relu')(x)
    x = layers.MaxPooling1D(pool_size=2, strides=2, data_format='channels_last')(x)

    # Flatten and fully connected layers
    x = layers.Flatten()(x)
    x = layers.Dense(4096, activation='relu')(x)
    x = layers.Dense(4096, activation='relu')(x)

    # Output layer
    output_tensor = layers.Dense(num_classes, activation='sigmoid')(x)  # Changed to 'sigmoid' for multilabel classification

    # Create model
    model = models.Model(inputs=input_tensor, outputs=output_tensor, name='vgg16_1d')

    return model

# Assuming you have input_shape and num_classes defined
input_shape = (1000, 12)  # Adjust input_size according to your data
num_classes = 5  # Adjust based on your number of classes

# Build the VGG16 1D model
vgg16_1d_model = build_1d_vgg16(input_shape, num_classes)

# Compile the model
vgg16_1d_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Print model summary
vgg16_1d_model.summary()


Model: "vgg16_1d"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_5 (InputLayer)        [(None, 1000, 12)]        0         
                                                                 
 conv1d_28 (Conv1D)          (None, 1000, 64)          2368      
                                                                 
 conv1d_29 (Conv1D)          (None, 1000, 64)          12352     
                                                                 
 max_pooling1d_4 (MaxPoolin  (None, 500, 64)           0         
 g1D)                                                            
                                                                 
 conv1d_30 (Conv1D)          (None, 500, 128)          24704     
                                                                 
 conv1d_31 (Conv1D)          (None, 500, 128)          49280     
                                                          

In [43]:
input_shape = (1000, 12)
num_classes = 5  

resnet18_1d_model = build_1d_resnet18(input_shape, num_classes)

resnet18_1d_model.summary()

Model: "resnet18_1d"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_8 (InputLayer)        [(None, 1000, 12)]           0         []                            
                                                                                                  
 conv1d_65 (Conv1D)          (None, 500, 64)              5440      ['input_8[0][0]']             
                                                                                                  
 max_pooling1d_11 (MaxPooli  (None, 250, 64)              0         ['conv1d_65[0][0]']           
 ng1D)                                                                                            
                                                                                                  
 conv1d_66 (Conv1D)          (None, 250, 64)              12352     ['max_pooling1d_11[0

In [40]:
from tensorflow.keras.optimizers.legacy import Adam

optimizer = Adam(learning_rate=0.001)

resnet18_1d_model.compile(
    optimizer=optimizer, 
    loss='categorical_crossentropy',
    metrics=['accuracy'])

In [12]:
from tensorflow.keras.callbacks import EarlyStopping
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

In [22]:
resnet18_1d_model.fit(
    X_train, y_train,
    validation_split = 0.2,
    epochs=100,
    batch_size = 8,
    callbacks = early_stopping
)

Epoch 1/100
Epoch 2/100

KeyboardInterrupt: 

In [44]:
from tensorflow.keras.optimizers.legacy import Adam
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import KFold

def train_model(X, Y, input_shape = (1000,12), num_classes = 5, k_folds=5, epochs=100, batch_size=8):
    # Initialize KFold
    kfold = KFold(n_splits=k_folds, shuffle=True, random_state=42)

    # Initialize lists to store model instances and training histories
    models = []
    histories = []

    for fold, (train_indices, val_indices) in enumerate(kfold.split(X)):
        print(f"Training on fold {fold + 1}...")

        X_train, X_val = X[train_indices], X[val_indices]
        Y_train, Y_val = Y[train_indices], Y[val_indices]

        resnet18_1d_model = build_1d_resnet18(input_shape, num_classes)

        optimizer = Adam(learning_rate=0.001)

        resnet18_1d_model.compile(
            optimizer=optimizer,
            loss='categorical_crossentropy',
            metrics=['accuracy']
        )

        early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

        history = resnet18_1d_model.fit(
            X_train, Y_train,
            validation_data=(X_val, Y_val),
            epochs=epochs,
            batch_size=batch_size,
            callbacks=[early_stopping]
        )

        # Save model and history for later analysis
        models.append(resnet18_1d_model)
        histories.append(history)

    return models, histories

In [45]:
trained_model, histories = train_model(X_train, Y_train, input_shape=(1000, 12), num_classes=5)

Training on fold 1...
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Training on fold 2...
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100

KeyboardInterrupt: 

In [None]:
results = trained_model.evaluate(X_test, Y_test)
print("Test Loss:", results[0])
print("Test Accuracy:", results[1])