In [1]:
import pandas as pd
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, Dropout
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.callbacks import EarlyStopping

# Load the Diagnostics.xlsx data
diagnostics_file = "../../../Datasets/12-lead electrocardiogram database/Diagnostics.xlsx"
diagnostics_df = pd.read_excel(diagnostics_file)

# Rename "SA" to "SI" in the "Rhythm" column
diagnostics_df["Rhythm"] = diagnostics_df["Rhythm"].replace("SA", "SI")

# Drop rows with any missing values
diagnostics_df = diagnostics_df.dropna()

# Encode "Gender" column: 0 for "MALE" and 1 for "FEMALE"
diagnostics_df["Gender"] = diagnostics_df["Gender"].map({"MALE": 0, "FEMALE": 1})

# Merge specified labels
merge_mapping = {
    "AF": "AFIB", "AFIB": "AFIB",
    "SVT": "GSVT", "AT": "GSVT", "SAAWR": "GSVT", "ST": "GSVT", "AVNRT": "GSVT", "AVRT": "GSVT",
    "SB": "SB",
    "SR": "SR", "SI": "SR"
}
diagnostics_df["Rhythm"] = diagnostics_df["Rhythm"].map(merge_mapping)

# Separate features and labels
features = diagnostics_df.drop(columns=["FileName", "Rhythm", "Beat"]).values
labels = diagnostics_df["Rhythm"].values  # Using "Rhythm" as the target variable

# Convert features to float32
features = features.astype("float32")

# Encode labels as one-hot with merged classes
unique_labels = np.unique(labels)
label_map = {label: index for index, label in enumerate(unique_labels)}
labels_encoded = to_categorical([label_map[label] for label in labels])

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(features, labels_encoded, test_size=0.2, random_state=42)

# Reshape the data for CNN input: we will use 2D convolution, so we need to reshape (samples, features) into (samples, time_steps, channels)
# Here, each ECG feature (e.g., Ventricular Rate, Atrial Rate) will be treated as a feature in the 2D matrix, and time is along one axis.
X_train_reshaped = X_train.reshape(X_train.shape[0], X_train.shape[1], 1, 1)  # (samples, features, 1, 1)
X_test_reshaped = X_test.reshape(X_test.shape[0], X_test.shape[1], 1, 1)  # (samples, features, 1, 1)

# Define the CNN+MLP hybrid model with 2D convolution
model = Sequential([
    # 2D Convolutional layer for feature extraction
    Conv2D(64, (3, 1), activation='relu', input_shape=(X_train_reshaped.shape[1], 1, 1)),
    MaxPooling2D((2, 1)),
    Dropout(0.3),

    # Flatten the output from CNN layer and pass it to dense layers
    Flatten(),

    # MLP layers for classification
    Dense(128, activation="relu"),
    Dropout(0.3),
    Dense(64, activation="relu"),
    Dense(labels_encoded.shape[1], activation="softmax")  # Output layer with the number of merged classes
])

# Compile the model
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

# Set up early stopping
early_stopping = EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True)

# Train the model
history = model.fit(X_train_reshaped, y_train, validation_data=(X_test_reshaped, y_test), epochs=20, batch_size=32, callbacks=[early_stopping])

# Evaluate the model
y_pred = model.predict(X_test_reshaped)
y_pred_classes = np.argmax(y_pred, axis=1)
y_test_classes = np.argmax(y_test, axis=1)

# Map back to original labels for a readable report
label_names = [label for label, index in sorted(label_map.items(), key=lambda item: item[1])]
print("\nClassification Report:\n")
print(classification_report(y_test_classes, y_pred_classes, target_names=label_names))

2024-11-08 22:46:18.175980: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-11-08 22:46:18.187066: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-11-08 22:46:18.190496: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-08 22:46:18.199376: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  super().__init__(activity_regularizer=activity_regu

Epoch 1/20


I0000 00:00:1731084381.323699  339633 service.cc:146] XLA service 0x77853c003df0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1731084381.323725  339633 service.cc:154]   StreamExecutor device (0): NVIDIA GeForce RTX 3070, Compute Capability 8.6
2024-11-08 22:46:21.342199: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2024-11-08 22:46:21.444389: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:531] Loaded cuDNN version 8907


[1m142/267[0m [32m━━━━━━━━━━[0m[37m━━━━━━━━━━[0m [1m0s[0m 1ms/step - accuracy: 0.3815 - loss: 13.0968

I0000 00:00:1731084382.749041  339633 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m267/267[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 11ms/step - accuracy: 0.4017 - loss: 9.2635 - val_accuracy: 0.5347 - val_loss: 1.0285
Epoch 2/20
[1m267/267[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.4986 - loss: 1.1337 - val_accuracy: 0.6545 - val_loss: 0.8393
Epoch 3/20
[1m267/267[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 715us/step - accuracy: 0.5698 - loss: 0.9458 - val_accuracy: 0.6944 - val_loss: 0.7716
Epoch 4/20
[1m267/267[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 754us/step - accuracy: 0.6256 - loss: 0.8546 - val_accuracy: 0.7150 - val_loss: 0.6879
Epoch 5/20
[1m267/267[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 773us/step - accuracy: 0.6645 - loss: 0.7767 - val_accuracy: 0.7840 - val_loss: 0.5925
Epoch 6/20
[1m267/267[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 687us/step - accuracy: 0.6882 - loss: 0.7401 - val_accuracy: 0.8146 - val_loss: 0.6125
Epoch 7/20
[1m267/267[0m [3