In [1]:
import pandas as pd
import numpy as np
import wfdb
import ast

def load_raw_data(df, sampling_rate):
    if sampling_rate == 100:
        data = [wfdb.rdsamp(f) for f in df.filename_lr]
    else:
        data = [wfdb.rdsamp(f) for f in df.filename_hr]
    data = np.array([signal for signal, meta in data])
    return data

sampling_rate=100

# load and convert annotation data
df = pd.read_csv('ptbxl_database.csv', index_col='ecg_id')
df.scp_codes = df.scp_codes.apply(lambda x: ast.literal_eval(x))

# Load raw signal data
Signals = load_raw_data(df, sampling_rate)

# Load scp_statements.csv for diagnostic aggregation
agg_df = pd.read_csv('scp_statements.csv', index_col=0)
agg_df = agg_df[agg_df.diagnostic == 1]

def aggregate_diagnostic(y_dic):
    tmp = []
    for key in y_dic.keys():
        if key in agg_df.index:
            tmp.append(agg_df.loc[key].diagnostic_class)
    return list(set(tmp))

# Apply diagnostic superclass
df.reset_index(inplace=True)
Labels = df.scp_codes.apply(aggregate_diagnostic)

In [73]:
import numpy as np
import pandas as pd

Labels.dropna(inplace = True)
# Drop NaN values and get unique labels
unique_labels = np.unique(np.concatenate(Labels))

# Create a dictionary to map labels to one-hot vectors
label_to_onehot = {}
for i, label in enumerate(unique_labels):
    onehot = [1 if j == i else 0 for j in range(len(unique_labels))]
    label_to_onehot[label] = onehot

# Create the one-hot encoded version
Y = []

for element in Labels:
    if element:
        onehot_sum = np.sum([label_to_onehot[label] for label in element], axis=0)
        Y.append(onehot_sum)
    else:
        Y.append(np.zeros(len(unique_labels)))

Y = np.stack(Y)


In [90]:
Y[14255]

array([1., 1., 1., 1., 0.])

In [76]:
np.unique(Labels)

array([list([]), list(['CD']), list(['CD', 'HYP']),
       list(['CD', 'HYP', 'MI', 'NORM']), list(['CD', 'HYP', 'NORM']),
       list(['CD', 'MI']), list(['CD', 'MI', 'HYP']),
       list(['CD', 'NORM']), list(['CD', 'STTC']),
       list(['CD', 'STTC', 'HYP']), list(['CD', 'STTC', 'MI']),
       list(['CD', 'STTC', 'MI', 'HYP']), list(['CD', 'STTC', 'NORM']),
       list(['HYP']), list(['HYP', 'NORM']), list(['MI']),
       list(['MI', 'CD']), list(['MI', 'HYP']), list(['MI', 'HYP', 'CD']),
       list(['MI', 'STTC']), list(['MI', 'STTC', 'CD']),
       list(['MI', 'STTC', 'HYP']), list(['MI', 'STTC', 'HYP', 'CD']),
       list(['NORM']), list(['STTC']), list(['STTC', 'HYP']),
       list(['STTC', 'NORM'])], dtype=object)

In [88]:
indices=[]
for i in range(len(Labels)):
    if Labels[i] == list(['CD', 'HYP', 'MI', 'NORM']):
        indices.append(i)

In [89]:
indices

[14255]

In [53]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(Signals, Y, test_size = 0.2)

In [57]:
#custom metric
def my_metric(ytrue,ypred):
    temp = np.argsort(ypred)[-1:][0]
    for i in range(len(ypred)):
        ypred[i] = 0
    ypred[temp] = 1
    true_1 = list(np.where(ytrue)[0])
    pred_1 = list(np.where(ypred)[0])
    false_1 = list(np.where([i-1 for i in ytrue])[0])
    count = 0
    for i in pred_1:
        if i in true_1:
            count+=1
    val = count/len(true_1)
    for i in false_1:
        if ypred[i]==1:
            val = 0
            break
    return val

In [58]:
#train
def train(model, dataloader, optimizer, criterion, train_data, device,scheduler):
    print('Training')
    model.train()
    counter = 0
    train_running_loss = 0.0
    accuracy = 0
    for i, data in tqdm(enumerate(dataloader), total=int(len(train_data)/dataloader.batch_size)):
        counter += 1
        data, target = data['image'].to(device), data['label'].to(device)
        optimizer.zero_grad()
        outputs = model(data)
        outputs = torch.sigmoid(outputs)
        loss = criterion(outputs, target)
        train_running_loss += loss.item()
        # backpropagation
        loss.backward()
        # update optimizer parameters
        optimizer.step()
        accuracy += my_metric(list(target.detach().cpu()[0].numpy()),list(outputs.detach().cpu()[0].numpy()))
    scheduler.step()
    train_loss = train_running_loss / counter
    accuracy = accuracy/counter
    return train_loss,accuracy

array([0., 1.])

In [49]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout, AveragePooling1D

cnn = Sequential([
    Conv1D(256, kernel_size=4, activation='relu', input_shape=(1000,12)),
    MaxPooling1D(pool_size=3),
    Dropout(0.25),
    
    Conv1D(64, kernel_size=3, activation='relu'),
    MaxPooling1D(pool_size=3),
    Dropout(0.25),
    
    Flatten(),
    Dense(128, activation='relu'),
    Dense(5, activation='softmax')
])

2024-01-18 23:37:02.359835: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M2
2024-01-18 23:37:02.359894: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 8.00 GB
2024-01-18 23:37:02.359917: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 2.67 GB
2024-01-18 23:37:02.360045: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-01-18 23:37:02.360148: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [54]:
from tensorflow.keras.optimizers.legacy import Adam

optimizer = Adam(learning_rate=0.001)
cnn.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

In [55]:
cnn.fit(
    X_train, y_train,
    validation_split = 0.2,
    epochs=5,
    batch_size = 8,
)

Epoch 1/5


2024-01-18 23:41:02.448008: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x28e2b4bd0>