In [1]:
import pandas as pd
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, Dropout, BatchNormalization
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.callbacks import EarlyStopping

# Load the Diagnostics.xlsx data
diagnostics_file = "../../../../Datasets/12-lead electrocardiogram database/Diagnostics.xlsx"
diagnostics_df = pd.read_excel(diagnostics_file)

# Rename "SA" to "SI" in the "Rhythm" column
diagnostics_df["Rhythm"] = diagnostics_df["Rhythm"].replace("SA", "SI")

# Drop rows with any missing values
diagnostics_df = diagnostics_df.dropna()

# Encode "Gender" column: 0 for "MALE" and 1 for "FEMALE"
diagnostics_df["Gender"] = diagnostics_df["Gender"].map({"MALE": 0, "FEMALE": 1})

# Merge specified labels
merge_mapping = {
    "AF": "AFIB",
    "AFIB": "AFIB",
    "SVT": "GSVT",
    "AT": "GSVT",
    "SAAWR": "GSVT",
    "ST": "GSVT",
    "AVNRT": "GSVT",
    "AVRT": "GSVT",
    "SB": "SB",
    "SR": "SR",
    "SI": "SR"
}
diagnostics_df["Rhythm"] = diagnostics_df["Rhythm"].map(merge_mapping)

# Separate features and labels
features = diagnostics_df.drop(columns=["FileName", "Rhythm", "Beat"]).values
labels = diagnostics_df["Rhythm"].values  # Using "Rhythm" as the target variable

# Convert features to float32
features = features.astype("float32")

# Encode labels as one-hot with merged classes
unique_labels = np.unique(labels)
label_map = {label: index for index, label in enumerate(unique_labels)}
labels_encoded = to_categorical([label_map[label] for label in labels])

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(features, labels_encoded, test_size=0.2, random_state=42)

2024-11-28 12:17:47.771199: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-11-28 12:17:47.782043: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-11-28 12:17:47.785335: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-28 12:17:47.795166: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(8516, 13) (8516, 4)
(2130, 13) (2130, 4)


In [3]:
mlp = Sequential([
    Dense(32, activation="relu", input_shape=(X_train.shape[1],)),
    BatchNormalization(),
    Dense(16, activation="relu"),
    BatchNormalization(),
    Dense(labels_encoded.shape[1], activation="softmax")
])

mlp.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

mlp.fit(X_train, y_train, epochs=500, batch_size=64, validation_split=0.2,
        # callbacks=[early_stopping]
        )

# Evaluate the model
y_pred = mlp.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_test_classes = np.argmax(y_test, axis=1)

# Map back to original labels for a readable report
label_names = [label for label, index in sorted(label_map.items(), key=lambda item: item[1])]
print("\nClassification Report:\n")
print(classification_report(y_test_classes, y_pred_classes, target_names=label_names, digits=5))


Epoch 1/500


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
I0000 00:00:1732774670.267986  711110 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1732774670.302519  711110 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1732774670.306645  711110 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1732774670.31184

[1m103/107[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 988us/step - accuracy: 0.4824 - loss: 1.2795

I0000 00:00:1732774672.055176  711255 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 14ms/step - accuracy: 0.4885 - loss: 1.2661 - val_accuracy: 0.4824 - val_loss: 1.2227
Epoch 2/500
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7800 - loss: 0.5932 - val_accuracy: 0.6896 - val_loss: 0.7489
Epoch 3/500
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 789us/step - accuracy: 0.8228 - loss: 0.4882 - val_accuracy: 0.7377 - val_loss: 0.6595
Epoch 4/500
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 740us/step - accuracy: 0.8331 - loss: 0.4571 - val_accuracy: 0.7218 - val_loss: 0.6862
Epoch 5/500
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 735us/step - accuracy: 0.8471 - loss: 0.4193 - val_accuracy: 0.8175 - val_loss: 0.5198
Epoch 6/500
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 782us/step - accuracy: 0.8383 - loss: 0.4129 - val_accuracy: 0.8281 - val_loss: 0.4864
Epoch 7/500
[1m107/107[

In [4]:
# Reshape the data for CNN input: we will use 2D convolution, so we need to reshape (samples, features) into (samples, time_steps, channels)
# Here, each ECG feature (e.g., Ventricular Rate, Atrial Rate) will be treated as a feature in the 2D matrix, and time is along one axis.
X_train_reshaped = X_train.reshape(X_train.shape[0], X_train.shape[1], 1, 1)  # (samples, features, 1, 1)
X_test_reshaped = X_test.reshape(X_test.shape[0], X_test.shape[1], 1, 1)  # (samples, features, 1, 1)

# Define the CNN+MLP hybrid model with 2D convolution
model = Sequential([
    # 2D Convolutional layer for feature extraction
    Conv2D(64, (3, 1), activation='relu', input_shape=(X_train_reshaped.shape[1], 1, 1)),
    MaxPooling2D((2, 1)),
    Dropout(0.3),

    # Flatten the output from CNN layer and pass it to dense layers
    Flatten(),

    # MLP layers for classification
    Dense(128, activation="relu"),
    Dropout(0.3),
    Dense(64, activation="relu"),
    Dense(labels_encoded.shape[1], activation="softmax")  # Output layer with the number of merged classes
])

# Compile the model
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

# Set up early stopping
# early_stopping = EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True)

# Train the model
model.fit(X_train_reshaped, y_train, validation_data=(X_test_reshaped, y_test), epochs=500, batch_size=64,
          # callbacks=[early_stopping]
          )

# Evaluate the model
y_pred = model.predict(X_test_reshaped)
y_pred_classes = np.argmax(y_pred, axis=1)
y_test_classes = np.argmax(y_test, axis=1)

# Map back to original labels for a readable report
label_names = [label for label, index in sorted(label_map.items(), key=lambda item: item[1])]
print("\nClassification Report:\n")
print(classification_report(y_test_classes, y_pred_classes, target_names=label_names, digits=5))

Epoch 1/500


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m134/134[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 20ms/step - accuracy: 0.3450 - loss: 10.7590 - val_accuracy: 0.4211 - val_loss: 1.1262
Epoch 2/500
[1m134/134[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 744us/step - accuracy: 0.4654 - loss: 1.4501 - val_accuracy: 0.6408 - val_loss: 0.9207
Epoch 3/500
[1m134/134[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 904us/step - accuracy: 0.5293 - loss: 1.0528 - val_accuracy: 0.6948 - val_loss: 0.7604
Epoch 4/500
[1m134/134[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 824us/step - accuracy: 0.5955 - loss: 0.9283 - val_accuracy: 0.7333 - val_loss: 0.6972
Epoch 5/500
[1m134/134[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 778us/step - accuracy: 0.6204 - loss: 0.8597 - val_accuracy: 0.7446 - val_loss: 0.6895
Epoch 6/500
[1m134/134[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 742us/step - accuracy: 0.6679 - loss: 0.7697 - val_accuracy: 0.7901 - val_loss: 0.6609
Epoch 7/500
[1m134/13

In [5]:
from sklearn.tree import DecisionTreeClassifier

# Decision Tree
dt = DecisionTreeClassifier(random_state=43)
dt.fit(X_train.reshape(X_train.shape[0], -1), y_train)
y_pred_dt = dt.predict(X_test.reshape(X_test.shape[0], -1))
print("Decision Tree Classification Report:")
print(classification_report(y_test, y_pred_dt, digits=5, target_names=label_names))
print("Depth of the tree", dt.get_depth())
print("Leaf nodes of the tree", dt.get_n_leaves())

Decision Tree Classification Report:
              precision    recall  f1-score   support

        AFIB    0.68613   0.66509   0.67545       424
        GSVT    0.79878   0.81535   0.80698       482
          SB    0.97570   0.98198   0.97883       777
          SR    0.87865   0.87472   0.87668       447

   micro avg    0.85869   0.85869   0.85869      2130
   macro avg    0.83482   0.83429   0.83449      2130
weighted avg    0.85766   0.85869   0.85812      2130
 samples avg    0.85869   0.85869   0.85869      2130

Depth of the tree 24
Leaf nodes of the tree 688
