In [3]:

import numpy as np
import os
import pandas as pd
import tensorflow as tf
from tensorflow.keras.applications import DenseNet121
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.metrics import AUC, Precision, Recall
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    classification_report, roc_auc_score, average_precision_score,
    matthews_corrcoef, hamming_loss, multilabel_confusion_matrix
)
import matplotlib.pyplot as plt
import seaborn as sns
import json
from tqdm import tqdm
import cv2

# Parameters
image_dir = "/Users/anatatar/Desktop/Licenta/ai_models/data/ecg_images_final"
csv_path = "/Users/anatatar/Desktop/Licenta/ai_models/data/labeled_ecg_images.csv"
target_classes = ['NORM', 'MI', 'STTC', 'CD', 'HYP']
image_size = (224, 224)
random_state = 42

os.makedirs("densenet", exist_ok=True)


In [4]:

df = pd.read_csv(csv_path)

def find_image_path(filename):
    for folder in os.listdir(image_dir):
        path = os.path.join(image_dir, folder, filename)
        if os.path.exists(path):
            return path
    return None

df['image_path'] = df['filename'].apply(find_image_path)
df = df[df['image_path'].notnull()]
print(f"✅ Total usable samples (multi-label): {len(df)}")

train_df, test_df = train_test_split(df, test_size=0.15, random_state=random_state)
train_df, val_df = train_test_split(train_df, test_size=0.15, random_state=random_state)

def load_or_cache_images(df_subset, name):
    X_path = f"densenet/X_{name}.npy"
    y_path = f"densenet/y_{name}.npy"
    if os.path.exists(X_path) and os.path.exists(y_path):
        print(f"✅ Loading cached {name} data...")
        X = np.load(X_path)
        y = np.load(y_path)
    else:
        print(f"⏳ Caching {name} data...")
        X, y = [], []
        for _, row in tqdm(df_subset.iterrows(), total=len(df_subset), desc=f"Loading {name} images"):
            img = load_img(row['image_path'], color_mode='rgb', target_size=image_size)
            img_array = img_to_array(img) / 255.0
            label_vector = row[target_classes].values.astype(np.float32)
            X.append(img_array)
            y.append(label_vector)
        X, y = np.array(X), np.array(y)
        np.save(X_path, X)
        np.save(y_path, y)
    return X, y

X_train, y_train = load_or_cache_images(train_df, "train")
X_val, y_val = load_or_cache_images(val_df, "val")
X_test, y_test = load_or_cache_images(test_df, "test")


✅ Total usable samples (multi-label): 21799
⏳ Caching train data...


Loading train images: 100%|██████████| 15749/15749 [00:14<00:00, 1098.09it/s]


⏳ Caching val data...


Loading val images: 100%|██████████| 2780/2780 [00:02<00:00, 985.38it/s] 


⏳ Caching test data...


Loading test images: 100%|██████████| 3270/3270 [00:03<00:00, 929.31it/s] 


In [None]:

inputs = Input(shape=(224, 224, 3))
base_model = DenseNet121(include_top=False, weights='imagenet', input_tensor=inputs)
x = GlobalAveragePooling2D()(base_model.output)
x = Dropout(0.4)(x)
output = Dense(len(target_classes), activation='sigmoid')(x)
model = Model(inputs=inputs, outputs=output)

for i, layer in enumerate(base_model.layers):
    layer.trainable = i >= len(base_model.layers) - 40

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
    loss='binary_crossentropy',
    metrics=[AUC(curve='ROC', multi_label=True, name='auc'),
             Precision(name='precision'),
             Recall(name='recall'),
             'accuracy']
)

early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
history = model.fit(X_train, y_train,
                    validation_data=(X_val, y_val),
                    epochs=25,
                    batch_size=8,
                    callbacks=[early_stopping],
                    verbose=1)

model.save("densenet/densenet_model.h5")
with open("densenet/history.json", "w") as f:
    json.dump(history.history, f)


Epoch 1/25
[1m   1/1969[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m3:03:46[0m 6s/step - accuracy: 0.0000e+00 - auc: 0.2305 - loss: 1.2380 - precision: 0.0870 - recall: 0.2222

In [None]:

results = model.evaluate(X_test, y_test, verbose=1)
y_pred_probs = model.predict(X_test)
y_pred_bin = (y_pred_probs > 0.5).astype(int)

np.save("densenet/y_test.npy", y_test)
np.save("densenet/y_pred_bin.npy", y_pred_bin)
np.save("densenet/y_pred_probs.npy", y_pred_probs)

report = classification_report(y_test, y_pred_bin, target_names=target_classes, zero_division=0)
print(report)

cm = multilabel_confusion_matrix(y_test, y_pred_bin)

for i, name in enumerate(target_classes):
    plt.figure(figsize=(6, 5))
    sns.heatmap(cm[i], annot=True, fmt='d', cmap='Blues',
                xticklabels=['Pred Neg', 'Pred Pos'],
                yticklabels=['Actual Neg', 'Actual Pos'])
    plt.title(f"Confusion Matrix - {name}")
    plt.tight_layout()
    plt.show()


In [None]:

plt.figure(figsize=(10, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history["loss"], label="Train Loss")
plt.plot(history.history["val_loss"], label="Val Loss")
plt.legend()
plt.title("Loss Curve")

plt.subplot(1, 2, 2)
plt.plot(history.history["accuracy"], label="Train Acc")
plt.plot(history.history["val_accuracy"], label="Val Acc")
plt.legend()
plt.title("Accuracy Curve")
plt.tight_layout()
plt.show()


In [None]:

def generate_grad_cam(model, image_array, class_index, output_path):
    grad_model = tf.keras.models.Model(
        [model.inputs], [model.get_layer(index=-3).output, model.output]
    )
    with tf.GradientTape() as tape:
        conv_outputs, predictions = grad_model(np.array([image_array]))
        loss = predictions[:, class_index]
    grads = tape.gradient(loss, conv_outputs)[0]
    conv_outputs = conv_outputs[0]
    weights = tf.reduce_mean(grads, axis=(0, 1))
    cam = np.zeros(conv_outputs.shape[0:2], dtype=np.float32)
    for i, w in enumerate(weights):
        cam += w * conv_outputs[:, :, i]
    cam = np.maximum(cam, 0)
    cam = cv2.resize(cam.numpy(), image_size)
    cam -= cam.min()
    cam /= cam.max()
    heatmap = cv2.applyColorMap(np.uint8(255 * cam), cv2.COLORMAP_JET)
    img = np.uint8(255 * image_array)
    overlay = cv2.addWeighted(img, 0.6, heatmap, 0.4, 0)
    cv2.imwrite(output_path, overlay)
    print(f"✅ Grad-CAM saved to {output_path}")

for i, class_name in enumerate(target_classes):
    generate_grad_cam(model, X_test[0], class_index=i, output_path=f"densenet/gradcam_{class_name}_sample0.png")
