In [1]:
import pandas as pd
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, Dropout, BatchNormalization
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.callbacks import EarlyStopping
import tensorflow as tf

# constant seed
tf.random.set_seed(6950)


# Load the Diagnostics.xlsx data
diagnostics_file = "../../../../Datasets/12-lead electrocardiogram database/Diagnostics.xlsx"
diagnostics_df = pd.read_excel(diagnostics_file)

# Rename "SA" to "SI" in the "Rhythm" column
diagnostics_df["Rhythm"] = diagnostics_df["Rhythm"].replace("SA", "SI")

# Drop rows with any missing values
diagnostics_df = diagnostics_df.dropna()

# Encode "Gender" column: 0 for "MALE" and 1 for "FEMALE"
diagnostics_df["Gender"] = diagnostics_df["Gender"].map({"MALE": 0, "FEMALE": 1})

# Merge specified labels
merge_mapping = {
    "AF": "AFIB",
    "AFIB": "AFIB",
    "SVT": "GSVT",
    "AT": "GSVT",
    "SAAWR": "GSVT",
    "ST": "GSVT",
    "AVNRT": "GSVT",
    "AVRT": "GSVT",
    "SB": "SB",
    "SR": "SR",
    "SI": "SR"
}
diagnostics_df["Rhythm"] = diagnostics_df["Rhythm"].map(merge_mapping)

# Separate features and labels
features = diagnostics_df.drop(columns=["FileName", "Rhythm", "Beat"]).values
labels = diagnostics_df["Rhythm"].values  # Using "Rhythm" as the target variable

# Convert features to float32
features = features.astype("float32")

# Encode labels as one-hot with merged classes
unique_labels = np.unique(labels)
label_map = {label: index for index, label in enumerate(unique_labels)}
labels_encoded = to_categorical([label_map[label] for label in labels])

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(features, labels_encoded, test_size=0.2, random_state=42)

2024-11-28 12:24:32.723678: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-11-28 12:24:32.733995: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-11-28 12:24:32.737147: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-28 12:24:32.746167: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(8516, 13) (8516, 4)
(2130, 13) (2130, 4)


In [3]:
mlp = Sequential([
    Dense(128, activation="relu", input_shape=(X_train.shape[1],)),
    BatchNormalization(),
    Dense(64, activation="relu"),
    BatchNormalization(),
    Dense(32, activation="relu"),
    BatchNormalization(),
    Dense(labels_encoded.shape[1], activation="softmax")
])

mlp.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

mlp.fit(X_train, y_train, epochs=500, batch_size=64, validation_split=0.2,
        # callbacks=[early_stopping]
        )

# Evaluate the model
y_pred = mlp.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_test_classes = np.argmax(y_test, axis=1)

# Map back to original labels for a readable report
label_names = [label for label, index in sorted(label_map.items(), key=lambda item: item[1])]
print("\nClassification Report:\n")
print(classification_report(y_test_classes, y_pred_classes, target_names=label_names, digits=5))

Epoch 1/500


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
I0000 00:00:1732775075.020792  771696 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1732775075.052621  771696 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1732775075.061801  771696 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1732775075.06612

[1m 84/107[0m [32m━━━━━━━━━━━━━━━[0m[37m━━━━━[0m [1m0s[0m 1ms/step - accuracy: 0.6475 - loss: 0.9153

I0000 00:00:1732775077.227850  771837 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 17ms/step - accuracy: 0.6755 - loss: 0.8494 - val_accuracy: 0.2811 - val_loss: 2.4595
Epoch 2/500
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8392 - loss: 0.4441 - val_accuracy: 0.7271 - val_loss: 0.6182
Epoch 3/500
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8448 - loss: 0.4072 - val_accuracy: 0.7964 - val_loss: 0.4872
Epoch 4/500
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 920us/step - accuracy: 0.8576 - loss: 0.3826 - val_accuracy: 0.8087 - val_loss: 0.5409
Epoch 5/500
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8651 - loss: 0.3630 - val_accuracy: 0.8380 - val_loss: 0.4286
Epoch 6/500
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8727 - loss: 0.3416 - val_accuracy: 0.8351 - val_loss: 0.4519
Epoch 7/500
[1m107/107[0m [3

In [4]:
mlp = Sequential([
    Dense(64, activation="relu", input_shape=(X_train.shape[1],)),
    BatchNormalization(),
    Dense(32, activation="relu"),
    BatchNormalization(),
    Dense(16, activation="relu"),
    BatchNormalization(),
    Dense(labels_encoded.shape[1], activation="softmax")
])

mlp.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

mlp.fit(X_train, y_train, epochs=500, batch_size=64, validation_split=0.2,
        # callbacks=[early_stopping]
        )

# Evaluate the model
y_pred = mlp.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_test_classes = np.argmax(y_test, axis=1)

# Map back to original labels for a readable report
label_names = [label for label, index in sorted(label_map.items(), key=lambda item: item[1])]
print("\nClassification Report:\n")
print(classification_report(y_test_classes, y_pred_classes, target_names=label_names, digits=5))

Epoch 1/500


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.6296 - loss: 0.9657 - val_accuracy: 0.6021 - val_loss: 1.1702
Epoch 2/500
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 841us/step - accuracy: 0.8271 - loss: 0.4633 - val_accuracy: 0.6878 - val_loss: 0.7792
Epoch 3/500
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 906us/step - accuracy: 0.8429 - loss: 0.4172 - val_accuracy: 0.8016 - val_loss: 0.4995
Epoch 4/500
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 836us/step - accuracy: 0.8463 - loss: 0.3932 - val_accuracy: 0.8492 - val_loss: 0.4312
Epoch 5/500
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 836us/step - accuracy: 0.8495 - loss: 0.3802 - val_accuracy: 0.8533 - val_loss: 0.3986
Epoch 6/500
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 927us/step - accuracy: 0.8547 - loss: 0.3699 - val_accuracy: 0.8668 - val_loss: 0.3586
Epoch 7/500
[1m107/107

In [5]:
mlp = Sequential([
    Dense(32, activation="relu", input_shape=(X_train.shape[1],)),
    BatchNormalization(),
    Dense(16, activation="relu"),
    BatchNormalization(),
    Dense(8, activation="relu"),
    BatchNormalization(),
    Dense(labels_encoded.shape[1], activation="softmax")
])

mlp.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

mlp.fit(X_train, y_train, epochs=500, batch_size=64, validation_split=0.2,
        # callbacks=[early_stopping]
        )

# Evaluate the model
y_pred = mlp.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_test_classes = np.argmax(y_test, axis=1)

# Map back to original labels for a readable report
label_names = [label for label, index in sorted(label_map.items(), key=lambda item: item[1])]
print("\nClassification Report:\n")
print(classification_report(y_test_classes, y_pred_classes, target_names=label_names, digits=5))

Epoch 1/500


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 18ms/step - accuracy: 0.3211 - loss: 1.6692 - val_accuracy: 0.2165 - val_loss: 3.0370
Epoch 2/500
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.6968 - loss: 0.7442 - val_accuracy: 0.7042 - val_loss: 0.7740
Epoch 3/500
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7800 - loss: 0.5765 - val_accuracy: 0.7975 - val_loss: 0.5837
Epoch 4/500
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 915us/step - accuracy: 0.8090 - loss: 0.5097 - val_accuracy: 0.7811 - val_loss: 0.5298
Epoch 5/500
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 980us/step - accuracy: 0.8208 - loss: 0.4770 - val_accuracy: 0.8046 - val_loss: 0.4796
Epoch 6/500
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 875us/step - accuracy: 0.8238 - loss: 0.4631 - val_accuracy: 0.8058 - val_loss: 0.4695
Epoch 7/500
[1m107/107[0m

In [6]:
mlp = Sequential([
    Dense(64, activation="relu", input_shape=(X_train.shape[1],)),
    BatchNormalization(),
    Dense(32, activation="relu"),
    Dense(labels_encoded.shape[1], activation="softmax")
])

mlp.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

mlp.fit(X_train, y_train, epochs=500, batch_size=64, validation_split=0.2,
        # callbacks=[early_stopping]
        )

# Evaluate the model
y_pred = mlp.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_test_classes = np.argmax(y_test, axis=1)

# Map back to original labels for a readable report
label_names = [label for label, index in sorted(label_map.items(), key=lambda item: item[1])]
print("\nClassification Report:\n")
print(classification_report(y_test_classes, y_pred_classes, target_names=label_names, digits=5))

Epoch 1/500


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 16ms/step - accuracy: 0.6204 - loss: 0.9602 - val_accuracy: 0.2570 - val_loss: 2.1612
Epoch 2/500
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 803us/step - accuracy: 0.8173 - loss: 0.4984 - val_accuracy: 0.5299 - val_loss: 0.9612
Epoch 3/500
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 832us/step - accuracy: 0.8365 - loss: 0.4315 - val_accuracy: 0.7840 - val_loss: 0.5641
Epoch 4/500
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 803us/step - accuracy: 0.8418 - loss: 0.4146 - val_accuracy: 0.8022 - val_loss: 0.5347
Epoch 5/500
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 775us/step - accuracy: 0.8450 - loss: 0.4006 - val_accuracy: 0.8404 - val_loss: 0.4604
Epoch 6/500
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 833us/step - accuracy: 0.8498 - loss: 0.3926 - val_accuracy: 0.8445 - val_loss: 0.4545
Epoch 7/500
[1m107/107

In [7]:
mlp = Sequential([
    Dense(32, activation="relu", input_shape=(X_train.shape[1],)),
    BatchNormalization(),
    Dense(16, activation="relu"),
    Dense(labels_encoded.shape[1], activation="softmax")
])

mlp.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

mlp.fit(X_train, y_train, epochs=500, batch_size=64, validation_split=0.2,
        # callbacks=[early_stopping]
        )

# Evaluate the model
y_pred = mlp.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_test_classes = np.argmax(y_test, axis=1)

# Map back to original labels for a readable report
label_names = [label for label, index in sorted(label_map.items(), key=lambda item: item[1])]
print("\nClassification Report:\n")
print(classification_report(y_test_classes, y_pred_classes, target_names=label_names, digits=5))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/500
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 16ms/step - accuracy: 0.4923 - loss: 1.1072 - val_accuracy: 0.3339 - val_loss: 1.6976
Epoch 2/500
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 848us/step - accuracy: 0.7388 - loss: 0.6715 - val_accuracy: 0.5370 - val_loss: 0.9233
Epoch 3/500
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 872us/step - accuracy: 0.8089 - loss: 0.5273 - val_accuracy: 0.6813 - val_loss: 0.6934
Epoch 4/500
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 761us/step - accuracy: 0.8204 - loss: 0.4722 - val_accuracy: 0.7987 - val_loss: 0.5201
Epoch 5/500
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 742us/step - accuracy: 0.8323 - loss: 0.4486 - val_accuracy: 0.8322 - val_loss: 0.4337
Epoch 6/500
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 714us/step - accuracy: 0.8341 - loss: 0.4350 - val_accuracy: 0.8257 - val_loss: 0.4235
Epoch 7/500

In [8]:
mlp = Sequential([
    Dense(32, activation="relu", input_shape=(X_train.shape[1],)),
    BatchNormalization(),
    Dense(16, activation="relu"),
    Dense(labels_encoded.shape[1], activation="softmax")
])

mlp.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

mlp.fit(X_train, y_train, epochs=500, batch_size=64, validation_split=0.2,
        # callbacks=[early_stopping]
        )

# Evaluate the model
y_pred = mlp.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_test_classes = np.argmax(y_test, axis=1)

# Map back to original labels for a readable report
label_names = [label for label, index in sorted(label_map.items(), key=lambda item: item[1])]
print("\nClassification Report:\n")
print(classification_report(y_test_classes, y_pred_classes, target_names=label_names, digits=5))

Epoch 1/500


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 13ms/step - accuracy: 0.5126 - loss: 1.1809 - val_accuracy: 0.2318 - val_loss: 2.5590
Epoch 2/500
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 992us/step - accuracy: 0.7547 - loss: 0.6439 - val_accuracy: 0.6373 - val_loss: 0.7885
Epoch 3/500
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 844us/step - accuracy: 0.8080 - loss: 0.5176 - val_accuracy: 0.6614 - val_loss: 0.7664
Epoch 4/500
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 809us/step - accuracy: 0.8205 - loss: 0.4696 - val_accuracy: 0.7729 - val_loss: 0.5980
Epoch 5/500
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 764us/step - accuracy: 0.8261 - loss: 0.4480 - val_accuracy: 0.8040 - val_loss: 0.5146
Epoch 6/500
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 784us/step - accuracy: 0.8316 - loss: 0.4367 - val_accuracy: 0.8363 - val_loss: 0.4260
Epoch 7/500
[1m107/107

In [9]:
mlp = Sequential([
    Dense(16, activation="relu", input_shape=(X_train.shape[1],)),
    BatchNormalization(),
    Dense(labels_encoded.shape[1], activation="softmax")
])

mlp.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

mlp.fit(X_train, y_train, epochs=500, batch_size=64, validation_split=0.2,
        # callbacks=[early_stopping]
        )

# Evaluate the model
y_pred = mlp.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_test_classes = np.argmax(y_test, axis=1)

# Map back to original labels for a readable report
label_names = [label for label, index in sorted(label_map.items(), key=lambda item: item[1])]
print("\nClassification Report:\n")
print(classification_report(y_test_classes, y_pred_classes, target_names=label_names, digits=5))


Epoch 1/500


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step - accuracy: 0.4338 - loss: 1.3475 - val_accuracy: 0.6567 - val_loss: 0.9434
Epoch 2/500
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 755us/step - accuracy: 0.6494 - loss: 0.8819 - val_accuracy: 0.2559 - val_loss: 1.4165
Epoch 3/500
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 755us/step - accuracy: 0.7220 - loss: 0.7122 - val_accuracy: 0.5511 - val_loss: 0.8864
Epoch 4/500
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 802us/step - accuracy: 0.7736 - loss: 0.6171 - val_accuracy: 0.7805 - val_loss: 0.6199
Epoch 5/500
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 806us/step - accuracy: 0.8046 - loss: 0.5555 - val_accuracy: 0.8216 - val_loss: 0.5331
Epoch 6/500
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 776us/step - accuracy: 0.8193 - loss: 0.5164 - val_accuracy: 0.8257 - val_loss: 0.4972
Epoch 7/500
[1m107/107