In [1]:
import pandas as pd
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, Dropout, BatchNormalization
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import tensorflow as tf

# constant seed
tf.random.set_seed(6950)


# Load the Diagnostics.xlsx data
diagnostics_file = "../../../../Datasets/12-lead electrocardiogram database/Diagnostics.xlsx"
diagnostics_df = pd.read_excel(diagnostics_file)

# Rename "SA" to "SI" in the "Rhythm" column
diagnostics_df["Rhythm"] = diagnostics_df["Rhythm"].replace("SA", "SI")

# Drop rows with any missing values
diagnostics_df = diagnostics_df.dropna()

# Encode "Gender" column: 0 for "MALE" and 1 for "FEMALE"
diagnostics_df["Gender"] = diagnostics_df["Gender"].map({"MALE": 0, "FEMALE": 1})

# Merge specified labels
merge_mapping = {
    "AF": "AFIB",
    "AFIB": "AFIB",
    "SVT": "GSVT",
    "AT": "GSVT",
    "SAAWR": "GSVT",
    "ST": "GSVT",
    "AVNRT": "GSVT",
    "AVRT": "GSVT",
    "SB": "SB",
    "SR": "SR",
    "SI": "SR"
}
diagnostics_df["Rhythm"] = diagnostics_df["Rhythm"].map(merge_mapping)

# Separate features and labels
features = diagnostics_df.drop(columns=["FileName", "Rhythm", "Beat"]).values
labels = diagnostics_df["Rhythm"].values  # Using "Rhythm" as the target variable

# Convert features to float32
features = features.astype("float32")

# Encode labels as one-hot with merged classes
unique_labels = np.unique(labels)
label_map = {label: index for index, label in enumerate(unique_labels)}
labels_encoded = to_categorical([label_map[label] for label in labels])

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(features, labels_encoded, test_size=0.2, random_state=42)

2025-01-10 21:40:20.373026: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-01-10 21:40:20.383726: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-01-10 21:40:20.387021: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-01-10 21:40:20.395883: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(8516, 13) (8516, 4)
(2130, 13) (2130, 4)


In [3]:
initial_trained_mlp = Sequential([
    Dense(128, activation="relu", input_shape=(X_train.shape[1],)),
    BatchNormalization(),
    Dense(64, activation="relu"),
    BatchNormalization(),
    Dense(32, activation="relu"),
    BatchNormalization(),
    Dense(labels_encoded.shape[1], activation="softmax")
])

initial_trained_mlp.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

initial_trained_mlp.fit(X_train, y_train, epochs=500, batch_size=64, validation_split=0.2,
                        # callbacks=[early_stopping]
                        )

# Evaluate the model
y_pred = initial_trained_mlp.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_test_classes = np.argmax(y_test, axis=1)

# Map back to original labels for a readable report
label_names = [label for label, index in sorted(label_map.items(), key=lambda item: item[1])]
print("\nClassification Report:\n")
print(classification_report(y_test_classes, y_pred_classes, target_names=label_names, digits=5))


Epoch 1/500


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
I0000 00:00:1736523622.821005   73717 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1736523622.853909   73717 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1736523622.860613   73717 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1736523622.86564

[1m 86/107[0m [32m━━━━━━━━━━━━━━━━[0m[37m━━━━[0m [1m0s[0m 1ms/step - accuracy: 0.6811 - loss: 0.8374

I0000 00:00:1736523625.145030   73864 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 17ms/step - accuracy: 0.7015 - loss: 0.7873 - val_accuracy: 0.5745 - val_loss: 1.2578
Epoch 2/500
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8322 - loss: 0.4335 - val_accuracy: 0.8011 - val_loss: 0.5385
Epoch 3/500
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 932us/step - accuracy: 0.8424 - loss: 0.4102 - val_accuracy: 0.8498 - val_loss: 0.4130
Epoch 4/500
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 899us/step - accuracy: 0.8502 - loss: 0.3863 - val_accuracy: 0.8656 - val_loss: 0.3516
Epoch 5/500
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8581 - loss: 0.3715 - val_accuracy: 0.8439 - val_loss: 0.3957
Epoch 6/500
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8592 - loss: 0.3593 - val_accuracy: 0.8521 - val_loss: 0.3989
Epoch 7/500
[1m107/107[0m 

In [4]:
initial_trained_mlp.summary()

In [5]:
optimized_mlp = Sequential([
    Dense(32, activation="relu", input_shape=(X_train.shape[1],)),
    BatchNormalization(),
    Dense(16, activation="relu"),
    Dense(labels_encoded.shape[1], activation="softmax")
])

optimized_mlp.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

optimized_mlp.fit(X_train, y_train, epochs=500, batch_size=64, validation_split=0.2,
                  # callbacks=[early_stopping]
                  )

# Evaluate the model
y_pred = optimized_mlp.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_test_classes = np.argmax(y_test, axis=1)

# Map back to original labels for a readable report
label_names = [label for label, index in sorted(label_map.items(), key=lambda item: item[1])]
print("\nClassification Report:\n")
print(classification_report(y_test_classes, y_pred_classes, target_names=label_names, digits=5))


Epoch 1/500


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 15ms/step - accuracy: 0.4346 - loss: 1.3019 - val_accuracy: 0.4853 - val_loss: 1.0219
Epoch 2/500
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 965us/step - accuracy: 0.7372 - loss: 0.7109 - val_accuracy: 0.7418 - val_loss: 0.6748
Epoch 3/500
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 737us/step - accuracy: 0.8168 - loss: 0.5272 - val_accuracy: 0.7887 - val_loss: 0.5805
Epoch 4/500
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 741us/step - accuracy: 0.8354 - loss: 0.4602 - val_accuracy: 0.8198 - val_loss: 0.5123
Epoch 5/500
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 741us/step - accuracy: 0.8434 - loss: 0.4349 - val_accuracy: 0.8504 - val_loss: 0.4151
Epoch 6/500
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 732us/step - accuracy: 0.8444 - loss: 0.4241 - val_accuracy: 0.8410 - val_loss: 0.4210
Epoch 7/500
[1m107/107

In [6]:
optimized_mlp.summary()