In [1]:
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np

# Load the Diagnostics.xlsx data
diagnostics_file = "../../../../Datasets/12-lead electrocardiogram database/Diagnostics.xlsx"
diagnostics_df = pd.read_excel(diagnostics_file)

# Rename "SA" to "SI" in the "Rhythm" column
diagnostics_df["Rhythm"] = diagnostics_df["Rhythm"].replace("SA", "SI")

# Drop rows with any missing values
diagnostics_df = diagnostics_df.dropna()

# Encode "Gender" column: 0 for "MALE" and 1 for "FEMALE"
diagnostics_df["Gender"] = diagnostics_df["Gender"].map({"MALE": 0, "FEMALE": 1})

# Merge specified labels
merge_mapping = {
    "AF": "AFIB", "AFIB": "AFIB",
    "SVT": "GSVT", "AT": "GSVT", "SAAWR": "GSVT", "ST": "GSVT", "AVNRT": "GSVT", "AVRT": "GSVT",
    "SB": "SB",
    "SR": "SR", "SI": "SR"
}
diagnostics_df["Rhythm"] = diagnostics_df["Rhythm"].map(merge_mapping)

# Separate features and labels
features = diagnostics_df.drop(columns=["FileName", "Rhythm", "Beat"]).values
labels = diagnostics_df["Rhythm"].values  # Using "Rhythm" as the target variable

# Convert features to float32
features = features.astype("float32")

# Encode labels as one-hot with merged classes
unique_labels = np.unique(labels)
label_map = {label: index for index, label in enumerate(unique_labels)}
labels_encoded = np.array([label_map[label] for label in labels])

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(features, labels_encoded, test_size=0.2, random_state=42)



In [2]:
from sklearn.ensemble import RandomForestClassifier

# Initialize the Decision Tree Classifier
rf = RandomForestClassifier(random_state=42, n_estimators=4)

# Train the model
rf.fit(X_train, y_train)

# Predict on test data
y_pred = rf.predict(X_test)

# Map back to original labels for a readable report
label_names = [label for label, index in sorted(label_map.items(), key=lambda item: item[1])]

# Evaluate and print classification report
print("\nClassification Report (Decision Tree):\n")
print(classification_report(y_test, y_pred, target_names=label_names, digits=5))


Classification Report (Decision Tree):

              precision    recall  f1-score   support

        AFIB    0.67012   0.76179   0.71302       424
        GSVT    0.85941   0.78631   0.82124       482
          SB    0.97318   0.98069   0.97692       777
          SR    0.90802   0.86130   0.88404       447

    accuracy                        0.86808      2130
   macro avg    0.85268   0.84752   0.84881      2130
weighted avg    0.87343   0.86808   0.86967      2130



In [3]:
print(rf.get_depth())
print(rf.get_n_leaves())

24
689


In [3]:
# Initialize the Decision Tree Classifier
rf = RandomForestClassifier(random_state=42, n_estimators=20)

# Train the model
rf.fit(X_train, y_train)

# Predict on test data
y_pred = rf.predict(X_test)

# Map back to original labels for a readable report
label_names = [label for label, index in sorted(label_map.items(), key=lambda item: item[1])]

# Evaluate and print classification report
print("\nClassification Report (Decision Tree):\n")
print(classification_report(y_test, y_pred, target_names=label_names, digits=5))


Classification Report (Decision Tree):

              precision    recall  f1-score   support

        AFIB    0.76271   0.74292   0.75269       424
        GSVT    0.85921   0.86100   0.86010       482
          SB    0.97592   0.99099   0.98340       777
          SR    0.91011   0.90604   0.90807       447

    accuracy                        0.89437      2130
   macro avg    0.87699   0.87524   0.87607      2130
weighted avg    0.89326   0.89437   0.89376      2130



In [4]:
# Initialize the Decision Tree Classifier
rf = RandomForestClassifier(random_state=42, n_estimators=30)

# Train the model
rf.fit(X_train, y_train)

# Predict on test data
y_pred = rf.predict(X_test)

# Map back to original labels for a readable report
label_names = [label for label, index in sorted(label_map.items(), key=lambda item: item[1])]

# Evaluate and print classification report
print("\nClassification Report (Decision Tree):\n")
print(classification_report(y_test, y_pred, target_names=label_names, digits=5))


Classification Report (Decision Tree):

              precision    recall  f1-score   support

        AFIB    0.77396   0.74292   0.75812       424
        GSVT    0.85336   0.86929   0.86125       482
          SB    0.97716   0.99099   0.98403       777
          SR    0.91667   0.91051   0.91358       447

    accuracy                        0.89718      2130
   macro avg    0.88029   0.87843   0.87925      2130
weighted avg    0.89600   0.89718   0.89649      2130



In [5]:
# Initialize the Decision Tree Classifier
rf = RandomForestClassifier(random_state=42, n_estimators=50)

# Train the model
rf.fit(X_train, y_train)

# Predict on test data
y_pred = rf.predict(X_test)

# Map back to original labels for a readable report
label_names = [label for label, index in sorted(label_map.items(), key=lambda item: item[1])]

# Evaluate and print classification report
print("\nClassification Report (Decision Tree):\n")
print(classification_report(y_test, y_pred, target_names=label_names, digits=5))


Classification Report (Decision Tree):

              precision    recall  f1-score   support

        AFIB    0.79798   0.74528   0.77073       424
        GSVT    0.86061   0.88382   0.87206       482
          SB    0.97716   0.99099   0.98403       777
          SR    0.91353   0.92170   0.91759       447

    accuracy                        0.90329      2130
   macro avg    0.88732   0.88545   0.88610      2130
weighted avg    0.90176   0.90329   0.90229      2130



In [6]:
# Initialize the Decision Tree Classifier
rf = RandomForestClassifier(random_state=42, n_estimators=100)

# Train the model
rf.fit(X_train, y_train)

# Predict on test data
y_pred = rf.predict(X_test)

# Map back to original labels for a readable report
label_names = [label for label, index in sorted(label_map.items(), key=lambda item: item[1])]

# Evaluate and print classification report
print("\nClassification Report (Decision Tree):\n")
print(classification_report(y_test, y_pred, target_names=label_names, digits=5))


Classification Report (Decision Tree):

              precision    recall  f1-score   support

        AFIB    0.80362   0.73349   0.76695       424
        GSVT    0.85200   0.88382   0.86762       482
          SB    0.97716   0.99099   0.98403       777
          SR    0.91429   0.93065   0.92239       447

    accuracy                        0.90282      2130
   macro avg    0.88677   0.88474   0.88525      2130
weighted avg    0.90110   0.90282   0.90154      2130

