In [1]:
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np

# Load the Diagnostics.xlsx data
diagnostics_file = "../../../../Datasets/12-lead electrocardiogram database/Diagnostics.xlsx"
diagnostics_df = pd.read_excel(diagnostics_file)

# Rename "SA" to "SI" in the "Rhythm" column
diagnostics_df["Rhythm"] = diagnostics_df["Rhythm"].replace("SA", "SI")

# Drop rows with any missing values
diagnostics_df = diagnostics_df.dropna()

# Encode "Gender" column: 0 for "MALE" and 1 for "FEMALE"
diagnostics_df["Gender"] = diagnostics_df["Gender"].map({"MALE": 0, "FEMALE": 1})

# Merge specified labels
merge_mapping = {
    "AF": "AFIB", "AFIB": "AFIB",
    "SVT": "GSVT", "AT": "GSVT", "SAAWR": "GSVT", "ST": "GSVT", "AVNRT": "GSVT", "AVRT": "GSVT",
    "SB": "SB",
    "SR": "SR", "SI": "SR"
}
diagnostics_df["Rhythm"] = diagnostics_df["Rhythm"].map(merge_mapping)

# Separate features and labels
features = diagnostics_df.drop(columns=["FileName", "Rhythm", "Beat"]).values
labels = diagnostics_df["Rhythm"].values  # Using "Rhythm" as the target variable

# Convert features to float32
features = features.astype("float32")

# Encode labels as one-hot with merged classes
unique_labels = np.unique(labels)
label_map = {label: index for index, label in enumerate(unique_labels)}
labels_encoded = np.array([label_map[label] for label in labels])

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(features, labels_encoded, test_size=0.2, random_state=42)

In [2]:
print(X_test[1])

[ 68.   0.  67. 250.  76. 482. 509.  75.  82.  11. 219. 257. 460.]


In [3]:
from sklearn.tree import DecisionTreeClassifier

# Initialize the Decision Tree Classifier
dt = DecisionTreeClassifier(random_state=42, max_depth=10, max_leaf_nodes=50)

# Train the model
dt.fit(X_train, y_train)

# Predict on test data
y_pred = dt.predict(X_test)

# Map back to original labels for a readable report
label_names = [label for label, index in sorted(label_map.items(), key=lambda item: item[1])]

# Evaluate and print classification report
print("\nClassification Report (Decision Tree):\n")
print(classification_report(y_test, y_pred, target_names=label_names, digits=5))


Classification Report (Decision Tree):

              precision    recall  f1-score   support

        AFIB    0.81096   0.69811   0.75032       424
        GSVT    0.83826   0.88174   0.85945       482
          SB    0.97959   0.98842   0.98398       777
          SR    0.90084   0.95526   0.92725       447

    accuracy                        0.89953      2130
   macro avg    0.88241   0.88088   0.88025      2130
weighted avg    0.89752   0.89953   0.89738      2130



In [4]:
print(dt.get_depth())
print(dt.get_n_leaves())

10
50


In [5]:
# Time  to Generating cpp header file

from micromlgen import port

converted_c_code = port(dt)

# Now you can save the code
with open("MicroGencode/optimized_author_provided_feat_dt_v1.h",
          "w") as modelFile:
    modelFile.write(converted_c_code)

In [6]:
import os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix

# Create Plots directory if it doesn't exist
os.makedirs('Plots', exist_ok=True)

# Calculate confusion matrix
cm = confusion_matrix(y_test, y_pred)

# Create figure with transparent background
plt.figure(figsize=(10, 8), facecolor='none')
ax = plt.gca()
ax.set_facecolor('none')

# Create heatmap using seaborn
sns.heatmap(cm,
            annot=True,
            fmt='d',
            cmap='Blues',
            xticklabels=label_names,
            yticklabels=label_names,
            square=True,  # Make cells square
            cbar_kws={'label': 'Number of Samples'})

# Customize the plot
plt.xlabel('Predicted Label', fontsize=12, fontweight='bold', labelpad=10)
plt.ylabel('True Label', fontsize=12, fontweight='bold', labelpad=10)
plt.title('Confusion Matrix - Decision Tree Classification\n'
          f'(Depth: {dt.get_depth()}, Leaves: {dt.get_n_leaves()})',
          fontsize=14,
          fontweight='bold',
          pad=20)

# Rotate x-axis labels for better readability
plt.xticks(rotation=45, ha='right')
plt.yticks(rotation=0)

# Adjust layout to prevent label cutoff
plt.tight_layout()

# Save the plot with transparency
timestamp = pd.Timestamp.now().strftime('%Y%m%d_%H%M%S')
filename = f'Plots/dt_confusion_matrix_{timestamp}.png'
plt.savefig(filename,
            dpi=300,
            bbox_inches='tight',
            transparent=True)
plt.close()

# Print normalized confusion matrix (as percentages)
cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
print("\nNormalized Confusion Matrix (%):")
for i, row in enumerate(cm_normalized):
    print(f"\n{label_names[i]}: ", end="")
    print(" ".join([f"{x:6.2f}" for x in row * 100]))

# Calculate and print the overall accuracy
accuracy = np.trace(cm) / np.sum(cm)
print(f"\nOverall Accuracy: {accuracy:.4f}")


Normalized Confusion Matrix (%):

AFIB:  69.81  19.10   1.65   9.43

GSVT:   9.75  88.17   0.83   1.24

SB:   1.03   0.00  98.84   0.13

SR:   3.13   0.22   1.12  95.53

Overall Accuracy: 0.8995
