<a href="https://colab.research.google.com/github/jasminjahanpuspo/Attention_Mechanism/blob/main/2_ResNet50_A_M.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#**<font color='yellow'>Model Name: Resnet50_Attention_Mechanism</font>**


# Step 1: Setup Environment

##### 🚀 Mount Google Drive
*   Access datasets stored in your Google Drive.  
*   After running this cell, you'll be prompted to authorize access.


In [None]:
from google.colab import drive
drive.mount('/content/drive')

##### 🛠 Import Required Libraries
*   Load all necessary libraries for image processing, data handling, and visualization.
*   Load TensorFlow, Keras, and layers for building CNN models.

In [None]:
# -------------------- Libraries for Data Handling -------------------- #
import numpy as np              # Numerical operations and arrays
import pandas as pd             # Data manipulation and analysis
import os                       # File and directory operations
import glob as gb               # File pattern matching (e.g., get all image paths)

# -------------------- Libraries for Image Processing ---------------- #
import cv2                      # OpenCV for image reading, processing, and augmentation

# -------------------- Libraries for Visualization ------------------ #
import matplotlib.pyplot as plt # Plotting graphs and images
import seaborn as sns           # Advanced visualizations (heatmaps, pairplots)
%matplotlib inline
import matplotlib

from PIL import Image
import random
import math


In [None]:
# -------------------- TensorFlow & Keras -------------------- #
import tensorflow as tf                       # Core TensorFlow library
from tensorflow import keras                  # High-level API for building neural networks
from tensorflow.keras.preprocessing import image

# -------------------- Dataset Utilities -------------------- #
from tensorflow.keras.preprocessing import image_dataset_from_directory
# Load images from directories into TensorFlow datasets

from tensorflow.keras import layers, models
# -------------------- Layers for CNN Models ---------------- #
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras.layers import Conv2D, MaxPool2D, LeakyReLU

# Step 2: Prepare Dataset

##### 🔹 Create TensorFlow Datasets


*   Load images from the directories into TensorFlow datasets for training, validation, and testing.  
*   You can adjust `image_size` and `batch_size` as needed.



In [None]:
## Define the directories for training, testing, and validation
train_directory = '/content/drive/MyDrive/sample_dataset/train'
test_directory = '/content/drive/MyDrive/sample_dataset/test'
valid_directory = '/content/drive/MyDrive/sample_dataset/valid'

In [None]:
IMG_SIZE = (224, 224)  # define resolution (299,299) /(224,224)
BATCH_SIZE = 128       # varies from dataset to datset prefferable 128/68/32

In [None]:
# Create TensorFlow datasets for training, testing, and validation
#you can customize parameters as per dataset
train_dataset = image_dataset_from_directory(
    train_directory,
    shuffle=True,
    labels='inferred',
    batch_size=BATCH_SIZE,
    image_size=IMG_SIZE,
    color_mode='rgb',
    seed=42
)

test_dataset = image_dataset_from_directory(
    test_directory,
    shuffle=True,
    labels='inferred',
    batch_size=BATCH_SIZE,
    image_size=IMG_SIZE,
    color_mode='rgb',
    seed=42
)

valid_dataset = image_dataset_from_directory(
    valid_directory,
    shuffle=True,
    labels='inferred',
    batch_size=BATCH_SIZE,
    image_size=IMG_SIZE,
    color_mode='rgb',
    seed=42
)

In [None]:
# Automatically detect number of classes from dataset
class_names = train_dataset.class_names  # works with image_dataset_from_directory
n_classes = len(class_names)

print("Detected Classes:", class_names)
print("Number of Classes:", n_classes)

In [None]:
class_counts = []
for i in range(len(train_dataset.class_names)):
    class_counts.append(sum(1 for _, label in train_dataset.unbatch().as_numpy_iterator() if label == i))

Dataset = class_counts  # now your class weights will adapt automatically

#**<font color='red'>ResNet50 + CBAM (Convolutional Block Attention Module)</font>**


## Step 3.1: Model Training

##### 🔗 CNN with CBAM
Add **CBAM (Convolutional Block Attention Module)** on top of a CNN backbone to enhance feature representation with **channel and spatial attention**.


In [None]:
# -----------------------------
# CBAM Implementation in Keras
# -----------------------------
class ChannelAttention(layers.Layer):
    def __init__(self, channels, ratio=8):
        super(ChannelAttention, self).__init__()
        self.avg_pool = layers.GlobalAveragePooling2D()
        self.max_pool = layers.GlobalMaxPooling2D()

        self.fc1 = layers.Dense(channels // ratio, activation="relu", kernel_initializer='he_normal', use_bias=True)
        self.fc2 = layers.Dense(channels, kernel_initializer='he_normal', use_bias=True)

    def call(self, inputs):
        avg_out = self.fc2(self.fc1(self.avg_pool(inputs)))
        max_out = self.fc2(self.fc1(self.max_pool(inputs)))
        out = avg_out + max_out
        out = tf.nn.sigmoid(out)
        out = tf.reshape(out, [-1, 1, 1, inputs.shape[-1]])
        return inputs * out


class SpatialAttention(layers.Layer):
    def __init__(self, kernel_size=7):
        super(SpatialAttention, self).__init__()
        self.conv1 = layers.Conv2D(1, kernel_size, padding="same", activation="sigmoid", kernel_initializer='he_normal')

    def call(self, inputs):
        avg_out = tf.reduce_mean(inputs, axis=3, keepdims=True)
        max_out = tf.reduce_max(inputs, axis=3, keepdims=True)
        concat = tf.concat([avg_out, max_out], axis=3)
        return inputs * self.conv1(concat)


class CBAM(layers.Layer):
    def __init__(self, channels, ratio=8, kernel_size=7, **kwargs):
        super(CBAM, self).__init__(**kwargs)  # Accept kwargs like name
        self.channel_attention = ChannelAttention(channels, ratio)
        self.spatial_attention = SpatialAttention(kernel_size)

    def call(self, inputs):
        x = self.channel_attention(inputs)
        x = self.spatial_attention(x)
        return x



# -----------------------------
# Modified Model with CBAM
# -----------------------------
def create_model(image_shape=(224,224,3), num_classes=n_classes):
    base_model = tf.keras.applications.ResNet50(input_shape=image_shape,
                                                   include_top=False,
                                                   weights="imagenet")
    base_model.trainable = True
    for layer in base_model.layers[0:291]:
        layer.trainable = False

    inputs = tf.keras.Input(shape=image_shape)
    x = tf.keras.applications.resnet50.preprocess_input(inputs)
    x = base_model(x, training=False)   # Output: (None, 5, 5, 2048)

    # Add CBAM with fixed name
    x = CBAM(channels=2048, name="cbam")(x)

    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dropout(0.5)(x)
    outputs = layers.Dense(num_classes, activation="softmax")(x)

    model = tf.keras.Model(inputs, outputs)
    return model

##### 🔹 Build and Compile Model

In [None]:
# -----------------------------
# Build and Compile
# -----------------------------
base_learning_rate = 0.0001
model = create_model(image_shape=(224,224,3), num_classes=n_classes)

model.compile(optimizer=tf.keras.optimizers.Nadam(learning_rate=base_learning_rate),
              loss=tf.keras.losses.SparseCategoricalCrossentropy(),
              metrics=["accuracy"])

model.summary()

##### 🔹 Load / Set Model Weights
Load pre-trained weights or initialize custom weights for the CNN + CBAM model.


In [None]:
# Assuming you already have Dataset as a list of samples per class
# or you can compute from train_dataset using:
# class_counts = [sum(1 for _, label in train_dataset.unbatch().as_numpy_iterator() if label==i) for i in range(n_classes)]

total = sum(Dataset)        # total number of images
class_weight = {}

for i, count in enumerate(Dataset):
    class_weight[i] = (1 / count) * (total / len(Dataset))

# Print class weights
for i, w in class_weight.items():
    print(f"Weight for class {i}: {w:.2f}")


##### 💾 Model Checkpoint & Save
Save the best model during training using **checkpoints**, and optionally save the final trained model.


In [None]:
from tensorflow.keras.callbacks import ModelCheckpoint
## set the path name as *dataset/Lr/optimizer_name/model_name*
model_filepath="/content/drive/MyDrive/sample_dataset/resnet50_cbam-{epoch:02d}-{val_accuracy:.4f}.keras"
checkpoint = ModelCheckpoint(
    filepath = model_filepath ,
    monitor ='val_accuracy',
    mode = 'max' ,
    save_best_only =True ,
    verbose = 1
)

##### 🔹 Train the Model
Train the CNN + CBAM model using the training dataset, validate on the validation dataset, and store the training history.


In [None]:
## change hyperparameter such as epoches
history = model.fit(train_dataset , verbose=2 , epochs=5 , class_weight=class_weight ,                         validation_data=valid_dataset ,
                    callbacks =[checkpoint])

##### 🔹 Test the Model
Evaluate the trained model on the test dataset.

In [None]:
model.evaluate(test_dataset , verbose = 1)

## Step 4.1: Results & Visualizations


##### 🔹 Actual vs Predicted Classes

*  Visualize the model’s predictions compared to true labels on the test dataset.
*   Collect **one example per class** from `test_dataset`.




In [None]:
examples = {}
for images, labels in test_dataset.unbatch().take(1000):  # take enough to find all classes
    class_idx = labels.numpy()
    if class_idx not in examples:
        examples[class_idx] = images
    if len(examples) == n_classes:
        break

# Plotting
cols = 2  # number of columns
rows = math.ceil(n_classes / cols)
plt.figure(figsize=(cols * 5, rows * 5))

for i, class_idx in enumerate(sorted(examples.keys())):
    img = examples[class_idx].numpy().astype("uint8")
    img_exp = tf.expand_dims(img, 0)  # expand batch dim
    predict = model.predict(img_exp)
    predicted = class_names[np.argmax(predict)]
    actual = class_names[class_idx]

    plt.subplot(rows, cols, i + 1)
    plt.imshow(img)
    plt.axis("off")
    color = 'blue' if predicted == actual else 'red'
    plt.title(f"Pred: {predicted}\nActual: {actual}", fontsize=12, fontweight='bold', color=color)
    plt.subplots_adjust(left=0.1, bottom=0.1, right=0.9,
                        top=0.9, wspace=0.4, hspace=0.4)

plt.show()

##### 🔹 Actual vs Predicted Images

*   Display sample images from the test set with their **true labels and model predictions** for qualitative evaluation.
*   Show **three images per class** for qualitative evaluation.


In [None]:
# Function to load multiple random images per class
def load_images_per_class(folder, num_images_per_class=3):
    images = []
    labels = []
    class_names = []

    for class_idx, subfolder in enumerate(sorted(os.listdir(folder))):
        subfolder_path = os.path.join(folder, subfolder)
        if os.path.isdir(subfolder_path):
            class_names.append(subfolder)
            image_files = os.listdir(subfolder_path)
            selected_files = random.sample(image_files, min(num_images_per_class, len(image_files)))
            for image_file in selected_files:
                img_path = os.path.join(subfolder_path, image_file)
                img = Image.open(img_path).convert('RGB')
                img = img.resize((64, 64))
                images.append(np.array(img)/255.0)
                labels.append(class_idx)

    return np.array(images), np.array(labels), class_names

# Path to your test folder
test_folder = '/content/drive/MyDrive/sample_dataset/test'

# Load images: change num_images_per_class as needed
num_images_per_class = 3
images, labels, class_names = load_images_per_class(test_folder, num_images_per_class=num_images_per_class)

# Example predicted labels (replace with your model predictions)
predicted_labels = labels.copy()  # For demo, assume correct predictions

# Automatically calculate subplot grid
total_images = len(images)
cols = 2  # Original + Predicted
rows = total_images  # Each image gets a row

fig, axes = plt.subplots(rows, cols, figsize=(cols*5, rows*4))

if rows == 1:  # Special case if only 1 image
    axes = np.expand_dims(axes, axis=0)

for i in range(total_images):
    img = images[i]
    true_label = class_names[labels[i]]
    predicted_label = class_names[predicted_labels[i]]

    # Original
    axes[i, 0].imshow(img)
    axes[i, 0].set_title(f'True: {true_label}', fontsize=12, fontweight='bold')
    axes[i, 0].axis('off')

    # Predicted
    axes[i, 1].imshow(img)
    color = 'blue' if true_label == predicted_label else 'red'
    axes[i, 1].set_title(f'Predicted: {predicted_label}', fontsize=12, fontweight='bold', color=color)
    axes[i, 1].axis('off')

plt.tight_layout()
plt.show()


##### 📈 Training Accuracy & Loss
Visualize the model's **training and validation accuracy and loss** over epochs to assess learning and overfitting.

In [None]:
acc = [0.] + history.history['accuracy']
val_acc = [0.] + history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

plt.figure(figsize=(17, 12))
plt.subplot(2, 2, 1)
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.ylabel('Accuracy')
plt.ylim([min(plt.ylim()),1])
plt.title('Training and Validation Accuracy', fontweight='bold')

plt.subplot(2, 2, 2)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.ylabel('Cross Entropy')
plt.ylim([0,3.0])
plt.title('Training and Validation Loss', fontweight='bold')
plt.xlabel('epoch')
plt.show()

##### 🔹 Label Binarization & Classification Metrics
Binarize class labels and evaluate model performance using metrics like **accuracy, precision, recall, and F1-score**.


In [None]:
from sklearn.preprocessing import label_binarize

# Initialize empty lists to store true labels and predicted labels
true_labels = []
predicted_labels = []

# Iterate through the validation dataset and make predictions
for images, labels in test_dataset:
    predictions = model.predict(images)
    predicted_labels.extend(np.argmax(predictions, axis=1))
    true_labels.extend(labels.numpy())

# Binarize the true and predicted labels
true_labels_bin = label_binarize(true_labels, classes=np.unique(true_labels))
predicted_labels_bin = label_binarize(predicted_labels, classes=np.unique(predicted_labels))

##### 🔹 Classification Report

Provides precision, recall, F1-score, and support for each class to summarize model performance.


In [None]:
from sklearn.metrics import classification_report

# Generate the classification report
report = classification_report(true_labels, predicted_labels, target_names=class_names)

# Print the classification report
print(report)

##### 📈 ROC curve
*   Plot the **ROC curve** to evaluate model performance.
*  **One-vs-Rest** for multiclass and
 **One-vs-One** for binary classification.

In [None]:
from sklearn.metrics import roc_curve, auc

# Get true labels and predictions
y_true = []
y_pred = []

for images, labels in test_dataset:     # or valid_dataset if you want
    preds = model.predict(images)
    y_true.extend(labels.numpy())
    y_pred.extend(preds)

y_true = np.array(y_true)
y_pred = np.array(y_pred)

print("Shape of predictions:", y_pred.shape)


In [None]:
# Binarize labels for multiclass ROC
y_true_bin = label_binarize(y_true, classes=list(range(n_classes)))

In [None]:
fpr = dict()
tpr = dict()
roc_auc = dict()

for i in range(n_classes):
    fpr[i], tpr[i], _ = roc_curve(y_true_bin[:, i], y_pred[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])


In [None]:
import matplotlib.cm as cm

plt.figure(figsize=(8, 6))

# Automatically generate colors
colors = cm.get_cmap('tab20', n_classes)  # 'tab20' or 'tab10', n_classes colors

for i in range(n_classes):
    plt.plot(fpr[i], tpr[i], color=colors(i), lw=2,
             label=f'{class_names[i]} (AUC = {roc_auc[i]:.2f})')

plt.plot([0, 1], [0, 1], 'k--', lw=1)
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve', fontweight='bold')
plt.legend(loc='lower right')
plt.grid(True)
plt.show()


##### 🔹 Confusion Matrix
Visualize the **confusion matrix** to assess class-wise prediction performance.

In [None]:
from sklearn.metrics import confusion_matrix

conf_matrix = confusion_matrix(true_labels, predicted_labels)

# Plot the confusion matrix as a heatmap
plt.figure(figsize=(5, 3))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.title('Confusion Matrix', fontweight='bold')
plt.show()

##### 🔹 Cohen's Kappa
Measure agreement between predicted and true labels beyond chance.

In [None]:
from sklearn.metrics import cohen_kappa_score

kappa = cohen_kappa_score(true_labels, predicted_labels)
print(f'Cohen\'s Kappa: {kappa:.4f}')

##### 🔹 Matthews Correlation Coefficient (MCC)
Assess overall classification quality considering all confusion matrix terms.

In [None]:
from sklearn.metrics import matthews_corrcoef

# Assuming true_labels and predicted_labels are multiclass labels
mcc_values = [matthews_corrcoef(true_labels == i, predicted_labels == i) for i in np.unique(true_labels)]

average_mcc = np.mean(mcc_values)
print(f'Average Matthews Correlation Coefficient for Multiclass: {average_mcc:.4f}')

##### 🔹 Right vs Wrong Classifier
Visualize and analyze **correctly and incorrectly classified samples** to understand model performance.

In [None]:
# Calculate confusion matrix
cm = confusion_matrix(true_labels, predicted_labels)

# Calculate total right and wrong predictions
total_right = sum(1 for true, pred in zip(true_labels, predicted_labels) if true == pred)
total_wrong = sum(1 for true, pred in zip(true_labels, predicted_labels) if true != pred)
total_samples = len(true_labels)

print("Total Right Predictions:", total_right)
print("Total Wrong Predictions:", total_wrong)

# Calculate percentages
right_percentage = (total_right / total_samples) * 100
wrong_percentage = (total_wrong / total_samples) * 100

# Data for the bar plot
labels = ['Right', 'Wrong']
percentages = [right_percentage, wrong_percentage]

In [None]:
# Create the bar plot
plt.figure(figsize=(8, 5))
bars = plt.bar(labels, percentages, color=['cyan', 'blue'])

# Add percentage labels above the bars
for bar in bars:
    yval = bar.get_height()
    plt.text(bar.get_x() + bar.get_width()/2, yval, f'{yval:.1f}%',
             ha='center', fontweight='bold', va='bottom')  # va: vertical alignment

# Add titles and labels
plt.title('Right and Wrong Predictions', fontweight='bold')
plt.ylabel('Percentage (%)')
plt.ylim(0, 100)  # Set y-axis limit to 100%
plt.axhline(0, color='grey', linewidth=0.8, linestyle='--')  # Optional: Add a horizontal line at y=0

# Show the plot
plt.show()

##### 🔹 Sensitivity & Specificity
Evaluate each class's **sensitivity (recall)** and **specificity** based on true positives and true negatives


In [None]:
# Compute confusion matrix
cm = confusion_matrix(true_labels, predicted_labels)
n_classes = cm.shape[0]

# If binary classification
if n_classes == 2:
    tn, fp, fn, tp = cm.ravel()
    sensitivity = tp / (tp + fn) if (tp + fn) > 0 else 0
    specificity = tn / (tn + fp) if (tn + fp) > 0 else 0

    print("Binary Classification:")
    print(f"Sensitivity (Recall): {sensitivity:.3f}")
    print(f"Specificity: {specificity:.3f}")

# If multiclass classification
else:
    # Sensitivity = TP / (TP + FN)
    sensitivity = np.diag(cm) / np.sum(cm, axis=1)
    # Specificity = TN / (TN + FP)
    specificity = []
    for i in range(n_classes):
        # For each class, treat it as "positive" vs "rest"
        tn = np.sum(np.delete(np.delete(cm, i, axis=0), i, axis=1))
        fp = np.sum(np.delete(cm, i, axis=0)[:, i])
        specificity.append(tn / (tn + fp) if (tn + fp) > 0 else 0)
    specificity = np.array(specificity)

    print("Multiclass Classification:")
    for i in range(n_classes):
        print(f"Class {i}: Sensitivity={sensitivity[i]:.3f}, Specificity={specificity[i]:.3f}")

    print(f"\nAverage Sensitivity: {np.mean(sensitivity):.3f}")
    print(f"Average Specificity: {np.mean(specificity):.3f}")


In [None]:
# --- Plot Sensitivity and Specificity ---
if n_classes == 2:
    metrics = ['Sensitivity', 'Specificity']
    values = [sensitivity, specificity]

    plt.figure(figsize=(6, 5))
    plt.bar(metrics, values, color=['#1f77b4', '#ff7f0e'])
    plt.ylim(0, 1)
    plt.title("Binary Classification Metrics")
    plt.ylabel("Score")
    plt.grid(axis='y', linestyle='--', alpha=0.7)
    plt.show()

else:
    x = np.arange(n_classes)
    width = 0.35  # Bar width

    plt.figure(figsize=(10, 6))
    plt.bar(x - width/2, sensitivity, width, label='Sensitivity', color='#1f77b4')
    plt.bar(x + width/2, specificity, width, label='Specificity', color='#ff7f0e')

    plt.xticks(x, [f'Class {i}' for i in range(n_classes)])
    plt.ylim(0, 1)
    plt.xlabel("Classes")
    plt.ylabel("Score")
    plt.title("Per-Class Sensitivity and Specificity", fontsize=12, fontweight='bold')
    plt.legend()
    plt.grid(axis='y', linestyle='--', alpha=0.7)
    plt.show()


In [None]:
# Compute confusion matrix
cm = confusion_matrix(true_labels, predicted_labels)
n_classes = cm.shape[0]

# Sensitivity (macro average): TP / (TP + FN)
sensitivity_per_class = np.diag(cm) / np.sum(cm, axis=1)
sensitivity = np.nanmean(sensitivity_per_class)

# Specificity (macro average): TN / (TN + FP)
specificity_list = []
for i in range(n_classes):
    tn = np.sum(np.delete(np.delete(cm, i, axis=0), i, axis=1))
    fp = np.sum(np.delete(cm, i, axis=0)[:, i])
    specificity_list.append(tn / (tn + fp) if (tn + fp) > 0 else np.nan)
specificity = np.nanmean(specificity_list)

print(f"Overall Sensitivity (Recall): {sensitivity:.3f}")
print(f"Overall Specificity: {specificity:.3f}")

# --- Plotting ---
plt.figure(figsize=(5, 4))
plt.bar(['Sensitivity', 'Specificity'], [sensitivity, specificity], color=['skyblue', 'salmon'])
plt.title('Overall Sensitivity and Specificity', fontsize=12, fontweight='bold')
plt.ylim(0, 1)
plt.ylabel('Score')
plt.grid(axis='y', linestyle='--', alpha=0.6)
plt.show()

##### 🔹 Jaccard Index & Dice Score
Measure overlap between predicted and true classes using **Jaccard Index (IoU)** and **Dice Score**.


In [None]:
# Compute confusion matrix
cm = confusion_matrix(true_labels, predicted_labels)
n_classes = cm.shape[0]

jaccard_per_class = []
dice_per_class = []

for i in range(n_classes):
    tp = cm[i, i]
    fp = np.sum(cm[:, i]) - tp
    fn = np.sum(cm[i, :]) - tp

    # Jaccard Index
    jaccard = tp / (tp + fp + fn) if (tp + fp + fn) > 0 else np.nan
    jaccard_per_class.append(jaccard)

    # Dice Score = 2*TP / (2*TP + FP + FN)
    dice = 2*tp / (2*tp + fp + fn) if (2*tp + fp + fn) > 0 else np.nan
    dice_per_class.append(dice)

# Macro-average (overall)
jaccard_index = np.nanmean(jaccard_per_class)
dice_score = np.nanmean(dice_per_class)

print(f"Overall Jaccard Index: {jaccard_index:.3f}")
print(f"Overall Dice Score: {dice_score:.3f}")

# --- Plotting ---
plt.figure(figsize=(6, 4))
plt.bar(['Jaccard Index', 'Dice Score'], [jaccard_index, dice_score], color=['lightgreen', 'skyblue'])
plt.ylim(0, 1)
plt.title('Overall Jaccard Index and Dice Score', fontsize=12, fontweight='bold')
plt.ylabel('Score')
plt.grid(axis='y', linestyle='--', alpha=0.6)
plt.show()

##### 🔹 Model Evaluation Metrics
Summarize the model's performance using multiple metrics: **Accuracy**, **Precision**, **Recall (Sensitivity)**, **F1 Score**, **Negative Predictive Value (NPV)**, **AUC-ROC**

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# Number of classes
n_classes = len(np.unique(true_labels))

# Accuracy, F1, Precision, Recall (macro average for multiclass)
accuracy = accuracy_score(true_labels, predicted_labels)
precision = precision_score(true_labels, predicted_labels, average='macro')
recall = recall_score(true_labels, predicted_labels, average='macro')
f1 = f1_score(true_labels, predicted_labels, average='macro')

# For binary: compute NPV and AUC
if n_classes == 2:
    cm = confusion_matrix(true_labels, predicted_labels)
    tn, fp, fn, tp = cm.ravel()
    npv = tn / (tn + fn) if (tn + fn) > 0 else 0
    auc = roc_auc_score(true_labels, predicted_labels)
else:
    npv = np.nan  # Not defined for multiclass
    auc = np.nan  # Not defined for multiclass

# Store metrics in a dictionary
metrics = {
    'Accuracy': accuracy,
    'Precision': precision,
    'Recall': recall,
    'F1 Score': f1,
    'NPV': npv,
    'AUC-ROC': auc
}

# Plotting
plt.figure(figsize=(12, 5))
plt.bar(metrics.keys(), metrics.values(), color=['blue', 'orange', 'green', 'red', 'purple', 'cyan'])
plt.ylabel('Score')
plt.title('Model Evaluation Metrics', fontsize=12, fontweight='bold')
plt.ylim(0, 1)
plt.axhline(y=0.5, color='grey', linestyle='--')
plt.grid(axis='y')
plt.show()

In [None]:
# Print metric values
for metric, value in metrics.items():
    print(f"{metric}: {value:.4f}")

##### 🔥 Grad-CAM Visualization
Generate **Grad-CAM heatmaps** to visualize the regions of input images that the **CNN + CBAM** model focuses on for its predictions.


In [None]:
# -----------------------------
# Grad-CAM Function
# -----------------------------
def make_gradcam_heatmap(img_array, model, last_conv_layer_name, pred_index=None):
    grad_model = tf.keras.models.Model(
        [model.inputs],
        [model.get_layer(last_conv_layer_name).output, model.output]
    )

    with tf.GradientTape() as tape:
        conv_outputs, predictions = grad_model(img_array)
        if pred_index is None:
            pred_index = tf.argmax(predictions[0])
        class_channel = predictions[:, pred_index]

    grads = tape.gradient(class_channel, conv_outputs)
    pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2))

    conv_outputs = conv_outputs[0]
    heatmap = conv_outputs @ pooled_grads[..., tf.newaxis]
    heatmap = tf.squeeze(heatmap)
    heatmap = tf.maximum(heatmap, 0) / tf.math.reduce_max(heatmap)
    return heatmap.numpy()

# -----------------------------
# Display Heatmap Function
# -----------------------------
def display_gradcam(img_path, heatmap, alpha=0.4):
    img = image.load_img(img_path, target_size=(224,224))
    img = image.img_to_array(img)

    heatmap = np.uint8(255 * heatmap)
    heatmap = np.expand_dims(heatmap, axis=2)
    heatmap = np.repeat(heatmap, 3, axis=2)
    heatmap = tf.image.resize(heatmap, (img.shape[0], img.shape[1])).numpy()

    superimposed_img = heatmap * alpha + img
    superimposed_img = np.uint8(superimposed_img)

    plt.imshow(superimposed_img)
    plt.axis('off')
    plt.show()

# -----------------------------
# Loop Through Classes
# -----------------------------
def gradcam_per_class(model, data_dir, last_conv_layer_name="cbam"):

    class_folders = sorted(os.listdir(data_dir))

    for class_name in class_folders:
        class_path = os.path.join(data_dir, class_name)
        if not os.path.isdir(class_path):
            continue

        # Take first image in folder
        img_name = os.listdir(class_path)[0]
        img_path = os.path.join(class_path, img_name)

        # Load and preprocess image
        img = image.load_img(img_path, target_size=(224,224))
        img_array = image.img_to_array(img)
        img_array = np.expand_dims(img_array, axis=0)
        img_array = tf.keras.applications.inception_v3.preprocess_input(img_array)

        # Generate Grad-CAM
        heatmap = make_gradcam_heatmap(img_array, model, last_conv_layer_name)

        # Plot
        print(f"Grad-CAM for class: {class_name}, image: {img_name}")
        display_gradcam(img_path, heatmap)

# -----------------------------
# Usage
# -----------------------------
data_dir = "/content/drive/MyDrive/sample_dataset/test"  # folder containing subfolders of each class
gradcam_per_class(model, data_dir)


In [None]:
import matplotlib.pyplot as plt
import matplotlib.cm as cm

# -----------------------------
# Grad-CAM Function
# -----------------------------
def make_gradcam_heatmap(img_array, model, last_conv_layer_name, pred_index=None):
    grad_model = tf.keras.models.Model(
        [model.inputs],
        [model.get_layer(last_conv_layer_name).output, model.output]
    )

    with tf.GradientTape() as tape:
        conv_outputs, predictions = grad_model(img_array)
        if pred_index is None:
            pred_index = tf.argmax(predictions[0])
        class_channel = predictions[:, pred_index]

    grads = tape.gradient(class_channel, conv_outputs)
    pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2))

    conv_outputs = conv_outputs[0]
    heatmap = conv_outputs @ pooled_grads[..., tf.newaxis]
    heatmap = tf.squeeze(heatmap)
    heatmap = tf.maximum(heatmap, 0) / tf.math.reduce_max(heatmap)
    return heatmap.numpy()

# -----------------------------
# Overlay Heatmap on Image
# -----------------------------
def overlay_heatmap_on_image(img_path, heatmap, alpha=0.4):
    # Load original image
    img = Image.open(img_path).convert("RGB")
    img = img.resize((224,224))
    img_array = np.array(img)

    # Convert heatmap to RGB using jet colormap
    # Use matplotlib.colormaps.get_cmap to access the colormap
    jet_colormap = matplotlib.colormaps.get_cmap('jet')
    heatmap_colored = jet_colormap(heatmap)[..., :3]  # RGB channels only
    heatmap_colored = np.uint8(heatmap_colored * 255)
    heatmap_image = Image.fromarray(heatmap_colored)

    # Resize heatmap to match original image using updated Pillow method
    heatmap_image = heatmap_image.resize(img_array.shape[:2][::-1], Image.Resampling.LANCZOS)
    heatmap_resized = np.array(heatmap_image)

    # Superimpose heatmap
    superimposed_img = np.uint8(alpha * heatmap_resized + (1 - alpha) * img_array)

    return Image.fromarray(superimposed_img)

# -----------------------------
# Grad-CAM for Each Class
# -----------------------------
def gradcam_per_class(model, data_dir):
    # Automatically get CBAM layer name
    cbam_layer_name = [layer.name for layer in model.layers if "cbam" in layer.name][0]
    print("Using CBAM layer:", cbam_layer_name)

    class_folders = sorted(os.listdir(data_dir))
    for class_name in class_folders:
        class_path = os.path.join(data_dir, class_name)
        if not os.path.isdir(class_path):
            continue

        # Pick first image of class
        img_name = os.listdir(class_path)[0]
        img_path = os.path.join(class_path, img_name)

        # Load and preprocess image
        img = image.load_img(img_path, target_size=(224,224))
        img_array = image.img_to_array(img)
        img_array = np.expand_dims(img_array, axis=0)
        img_array = tf.keras.applications.inception_v3.preprocess_input(img_array)

        # Compute Grad-CAM heatmap
        heatmap = make_gradcam_heatmap(img_array, model, cbam_layer_name)

        # Overlay heatmap
        superimposed_img = overlay_heatmap_on_image(img_path, heatmap)

        # Display
        plt.figure(figsize=(5,5))
        plt.title(f"Class: {class_name}, Image: {img_name}")
        plt.imshow(superimposed_img)
        plt.axis('off')
        plt.show()

# -----------------------------
# Usage
# -----------------------------
data_dir = "/content/drive/MyDrive/sample_dataset/test"  # folder with subfolders for each class
gradcam_per_class(model, data_dir)

##### 🔹 PR-AUC (Precision-Recall AUC)
Evaluate model performance using the **area under the Precision-Recall curve**, especially useful for imbalanced datasets.

In [None]:
from sklearn.metrics import precision_recall_curve, auc

# Get true labels and predicted probabilities
y_true = np.concatenate([y.numpy() for x, y in test_dataset], axis=0)
y_pred_probs = model.predict(test_dataset)

# Handle binary or multiclass
if y_pred_probs.shape[1] == 2:  # binary
    precision, recall, _ = precision_recall_curve(y_true, y_pred_probs[:, 1])
    pr_auc = auc(recall, precision)
else:  # multiclass
    pr_auc = {}
    from sklearn.preprocessing import label_binarize
    y_true_bin = label_binarize(y_true, classes=range(y_pred_probs.shape[1]))
    for i in range(y_pred_probs.shape[1]):
        precision, recall, _ = precision_recall_curve(y_true_bin[:, i], y_pred_probs[:, i])
        pr_auc[i] = auc(recall, precision)

print("PR-AUC:", pr_auc)


##### 🔹 Log Loss / Cross-Entropy Loss
Evaluate prediction confidence using **log loss** (cross-entropy) between true and predicted probabilities.

In [None]:
from sklearn.metrics import log_loss

y_true = np.concatenate([y.numpy() for x, y in test_dataset], axis=0)
y_pred_probs = model.predict(test_dataset)

loss = log_loss(y_true, y_pred_probs)
print("Log Loss / Cross-Entropy Loss:", loss)


##### 🔹 Top-k Accuracy
Measure if the **true label** is among the model's **top k predicted classes** in multiclass classification.

In [None]:
# Top-k accuracy automatically for multiclass
k = 3  # you can change k
top_k_acc = tf.keras.metrics.TopKCategoricalAccuracy(k=k)

y_true = np.concatenate([y.numpy() for x, y in test_dataset], axis=0)
y_pred_probs = model.predict(test_dataset)

# Convert binary labels to categorical if needed
if y_pred_probs.shape[1] == 2 and len(np.unique(y_true)) == 2:
    from tensorflow.keras.utils import to_categorical
    y_true_cat = to_categorical(y_true, num_classes=2)
else:
    y_true_cat = tf.keras.utils.to_categorical(y_true, num_classes=y_pred_probs.shape[1])

top_k_acc.update_state(y_true_cat, y_pred_probs)
print(f"Top-{k} Accuracy:", top_k_acc.result().numpy())

##### 🔹 G-Mean (Geometric Mean of Sensitivity & Specificity)
Compute the **G-Mean** to evaluate balanced classification performance.

In [None]:
y_true = np.concatenate([y.numpy() for x, y in test_dataset], axis=0)
y_pred = np.argmax(model.predict(test_dataset), axis=1)

cm = confusion_matrix(y_true, y_pred)
if cm.shape[0] == 2:  # binary
    tn, fp, fn, tp = cm.ravel()
    sensitivity = tp / (tp + fn)
    specificity = tn / (tn + fp)
    g_mean = math.sqrt(sensitivity * specificity)
else:  # multiclass: compute G-mean per class and average
    sensitivity_list = []
    specificity_list = []
    for i in range(cm.shape[0]):
        tp = cm[i, i]
        fn = cm[i, :].sum() - tp
        fp = cm[:, i].sum() - tp
        tn = cm.sum() - (tp + fn + fp)
        sensitivity_list.append(tp / (tp + fn) if (tp+fn)>0 else 0)
        specificity_list.append(tn / (tn + fp) if (tn+fp)>0 else 0)
    g_mean = np.mean(np.sqrt(np.array(sensitivity_list) * np.array(specificity_list)))

print("G-Mean:", g_mean)

#**<font color='red'>ResNet50  + SE (Squeeze-and-Excitation)</font>**


## Step 3.2: Model Training

##### 🔗 CNN with SE
Add **SE (Squeeze-and-Excitation)** blocks to help the model focus more on the **important channels** in the image.

In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models

# -----------------------------
# SE Block
# -----------------------------
class SEBlock(layers.Layer):
    def __init__(self, channels, ratio=8, **kwargs):  # add **kwargs
        super(SEBlock, self).__init__(**kwargs)       # pass kwargs to parent
        self.global_avg_pool = layers.GlobalAveragePooling2D()
        self.fc1 = layers.Dense(channels // ratio, activation='relu', kernel_initializer='he_normal')
        self.fc2 = layers.Dense(channels, activation='sigmoid', kernel_initializer='he_normal')

    def call(self, inputs):
        se = self.global_avg_pool(inputs)
        se = self.fc1(se)
        se = self.fc2(se)
        se = tf.reshape(se, [-1, 1, 1, inputs.shape[-1]])
        return inputs * se



# -----------------------------
# Model with SE
# -----------------------------
def create_model(image_shape=(224,224,3), num_classes=n_classes):
    base_model = tf.keras.applications.ResNet50(input_shape=image_shape,
                                                   include_top=False,
                                                   weights="imagenet")
    base_model.trainable = True
    for layer in base_model.layers[0:291]:
        layer.trainable = False

    inputs = tf.keras.Input(shape=image_shape)
    x = tf.keras.applications.resnet.preprocess_input(inputs)
    x = base_model(x, training=False)   # (None, 5, 5, 2048)

    # 🔥 Insert SE here
    x = SEBlock(channels=2048, name="se_block")(x)

    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dropout(0.5)(x)
    outputs = layers.Dense(num_classes, activation="softmax")(x)

    return tf.keras.Model(inputs, outputs)


##### 🔹 Build and Compile Model

In [None]:
# -----------------------------
# Build and Compile
# -----------------------------
base_learning_rate = 0.0001
model = create_resnet_se(image_shape=(224,224,3), num_classes=n_classes)

model.compile(
    optimizer=tf.keras.optimizers.Nadam(learning_rate=base_learning_rate),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(),
    metrics=["accuracy"]
)

model.summary()

##### 🔹 Load / Set Model Weights
Load pre-trained weights or initialize custom weights for the CNN + CBAM model.


In [None]:
# Assuming you already have Dataset as a list of samples per class
# or you can compute from train_dataset using:
# class_counts = [sum(1 for _, label in train_dataset.unbatch().as_numpy_iterator() if label==i) for i in range(n_classes)]

total = sum(Dataset)        # total number of images
class_weight = {}

for i, count in enumerate(Dataset):
    class_weight[i] = (1 / count) * (total / len(Dataset))

# Print class weights
for i, w in class_weight.items():
    print(f"Weight for class {i}: {w:.2f}")


##### 🔹 Model Checkpoint & Save
Save the best model during training using **checkpoints**, and optionally save the final trained model.


In [None]:
from tensorflow.keras.callbacks import ModelCheckpoint
## set the path name as *dataset/Lr/optimizer_name/model_name*
model_filepath="/content/drive/MyDrive/sample_dataset/resnet_se-{epoch:02d}-{val_accuracy:.4f}.keras"
checkpoint = ModelCheckpoint(
    filepath = model_filepath ,
    monitor ='val_accuracy',
    mode = 'max' ,
    save_best_only =True ,
    verbose = 1
)


##### 🔹 Train the Model
Train the CNN + CBAM model using the training dataset, validate on the validation dataset, and store the training history.


In [None]:
## change hyperparameter such as epoches
history = model.fit(train_dataset , verbose=2 , epochs=5 , class_weight=class_weight ,                         validation_data=valid_dataset ,
                    callbacks =[checkpoint])

##### 🔹 Test the Model
Evaluate the trained model on the test dataset.

In [None]:
model.evaluate(test_dataset , verbose = 1)

## Step 4.2: Results & Visualizations


##### 🔹 Actual vs Predicted Classes

*  Visualize the model’s predictions compared to true labels on the test dataset.
*   Collect **one example per class** from `test_dataset`.




In [None]:
examples = {}
for images, labels in test_dataset.unbatch().take(1000):  # take enough to find all classes
    class_idx = labels.numpy()
    if class_idx not in examples:
        examples[class_idx] = images
    if len(examples) == n_classes:
        break

# Plotting
cols = 2  # number of columns
rows = math.ceil(n_classes / cols)
plt.figure(figsize=(cols * 5, rows * 5))

for i, class_idx in enumerate(sorted(examples.keys())):
    img = examples[class_idx].numpy().astype("uint8")
    img_exp = tf.expand_dims(img, 0)  # expand batch dim
    predict = model.predict(img_exp)
    predicted = class_names[np.argmax(predict)]
    actual = class_names[class_idx]

    plt.subplot(rows, cols, i + 1)
    plt.imshow(img)
    plt.axis("off")
    color = 'blue' if predicted == actual else 'red'
    plt.title(f"Pred: {predicted}\nActual: {actual}", fontsize=12, fontweight='bold', color=color)
    plt.subplots_adjust(left=0.1, bottom=0.1, right=0.9,
                        top=0.9, wspace=0.4, hspace=0.4)

plt.show()

##### 🔹 Actual vs Predicted Images

*   Display sample images from the test set with their **true labels and model predictions** for qualitative evaluation.
*   Show **three images per class** for qualitative evaluation.


In [None]:
# Function to load multiple random images per class
def load_images_per_class(folder, num_images_per_class=3):
    images = []
    labels = []
    class_names = []

    for class_idx, subfolder in enumerate(sorted(os.listdir(folder))):
        subfolder_path = os.path.join(folder, subfolder)
        if os.path.isdir(subfolder_path):
            class_names.append(subfolder)
            image_files = os.listdir(subfolder_path)
            selected_files = random.sample(image_files, min(num_images_per_class, len(image_files)))
            for image_file in selected_files:
                img_path = os.path.join(subfolder_path, image_file)
                img = Image.open(img_path).convert('RGB')
                img = img.resize((64, 64))
                images.append(np.array(img)/255.0)
                labels.append(class_idx)

    return np.array(images), np.array(labels), class_names

# Path to your test folder
test_folder = '/content/drive/MyDrive/sample_dataset/test'

# Load images: change num_images_per_class as needed
num_images_per_class = 3
images, labels, class_names = load_images_per_class(test_folder, num_images_per_class=num_images_per_class)

# Example predicted labels (replace with your model predictions)
predicted_labels = labels.copy()  # For demo, assume correct predictions

# Automatically calculate subplot grid
total_images = len(images)
cols = 2  # Original + Predicted
rows = total_images  # Each image gets a row

fig, axes = plt.subplots(rows, cols, figsize=(cols*5, rows*4))

if rows == 1:  # Special case if only 1 image
    axes = np.expand_dims(axes, axis=0)

for i in range(total_images):
    img = images[i]
    true_label = class_names[labels[i]]
    predicted_label = class_names[predicted_labels[i]]

    # Original
    axes[i, 0].imshow(img)
    axes[i, 0].set_title(f'True: {true_label}', fontsize=12, fontweight='bold')
    axes[i, 0].axis('off')

    # Predicted
    axes[i, 1].imshow(img)
    color = 'blue' if true_label == predicted_label else 'red'
    axes[i, 1].set_title(f'Predicted: {predicted_label}', fontsize=12, fontweight='bold', color=color)
    axes[i, 1].axis('off')

plt.tight_layout()
plt.show()


##### 📈 Training Accuracy & Loss
Visualize the model's **training and validation accuracy and loss** over epochs to assess learning and overfitting.

In [None]:
acc = [0.] + history.history['accuracy']
val_acc = [0.] + history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

plt.figure(figsize=(17, 12))
plt.subplot(2, 2, 1)
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.ylabel('Accuracy')
plt.ylim([min(plt.ylim()),1])
plt.title('Training and Validation Accuracy', fontweight='bold')

plt.subplot(2, 2, 2)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.ylabel('Cross Entropy')
plt.ylim([0,3.0])
plt.title('Training and Validation Loss', fontweight='bold')
plt.xlabel('epoch')
plt.show()

##### 🔹 Label Binarization & Classification Metrics
Binarize class labels and evaluate model performance using metrics like **accuracy, precision, recall, and F1-score**.


In [None]:
from sklearn.preprocessing import label_binarize

# Initialize empty lists to store true labels and predicted labels
true_labels = []
predicted_labels = []

# Iterate through the validation dataset and make predictions
for images, labels in test_dataset:
    predictions = model.predict(images)
    predicted_labels.extend(np.argmax(predictions, axis=1))
    true_labels.extend(labels.numpy())

# Binarize the true and predicted labels
true_labels_bin = label_binarize(true_labels, classes=np.unique(true_labels))
predicted_labels_bin = label_binarize(predicted_labels, classes=np.unique(predicted_labels))

##### 🔹 Classification Report
Provides precision, recall, F1-score, and support for each class to summarize model performance.

In [None]:
from sklearn.metrics import classification_report

# Generate the classification report
report = classification_report(true_labels, predicted_labels, target_names=class_names)

# Print the classification report
print(report)

##### 📈 ROC curve
*   Plot the **ROC curve** to evaluate model performance.
*  **One-vs-Rest** for multiclass and
 **One-vs-One** for binary classification.

In [None]:
from sklearn.metrics import roc_curve, auc

# Get true labels and predictions
y_true = []
y_pred = []

for images, labels in test_dataset:     # or valid_dataset if you want
    preds = model.predict(images)
    y_true.extend(labels.numpy())
    y_pred.extend(preds)

y_true = np.array(y_true)
y_pred = np.array(y_pred)

print("Shape of predictions:", y_pred.shape)


In [None]:
# Binarize labels for multiclass ROC
y_true_bin = label_binarize(y_true, classes=list(range(n_classes)))

In [None]:
fpr = dict()
tpr = dict()
roc_auc = dict()

for i in range(n_classes):
    fpr[i], tpr[i], _ = roc_curve(y_true_bin[:, i], y_pred[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])


In [None]:
import matplotlib.cm as cm

plt.figure(figsize=(8, 6))

# Automatically generate colors
colors = cm.get_cmap('tab20', n_classes)  # 'tab20' or 'tab10', n_classes colors

for i in range(n_classes):
    plt.plot(fpr[i], tpr[i], color=colors(i), lw=2,
             label=f'{class_names[i]} (AUC = {roc_auc[i]:.2f})')

plt.plot([0, 1], [0, 1], 'k--', lw=1)
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve', fontweight='bold')
plt.legend(loc='lower right')
plt.grid(True)
plt.show()


##### 🔹 Confusion Matrix
Visualize the **confusion matrix** to assess class-wise prediction performance.

In [None]:
from sklearn.metrics import confusion_matrix

conf_matrix = confusion_matrix(true_labels, predicted_labels)

# Plot the confusion matrix as a heatmap
plt.figure(figsize=(5, 3))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.title('Confusion Matrix', fontweight='bold')
plt.show()

##### 🔹 Cohen's Kappa
Measure agreement between predicted and true labels beyond chance.

In [None]:
from sklearn.metrics import cohen_kappa_score

kappa = cohen_kappa_score(true_labels, predicted_labels)
print(f'Cohen\'s Kappa: {kappa:.4f}')

##### 🔹 Matthews Correlation Coefficient (MCC)
Assess overall classification quality considering all confusion matrix terms.

In [None]:
from sklearn.metrics import matthews_corrcoef

# Assuming true_labels and predicted_labels are multiclass labels
mcc_values = [matthews_corrcoef(true_labels == i, predicted_labels == i) for i in np.unique(true_labels)]

average_mcc = np.mean(mcc_values)
print(f'Average Matthews Correlation Coefficient for Multiclass: {average_mcc:.4f}')

##### 🔹 Right vs Wrong Classifier
Visualize and analyze **correctly and incorrectly classified samples** to understand model performance.

In [None]:
# Calculate total right and wrong predictions
total_right = sum(1 for true, pred in zip(true_labels, predicted_labels) if true == pred)
total_wrong = sum(1 for true, pred in zip(true_labels, predicted_labels) if true != pred)
total_samples = len(true_labels)

# Calculate wrong prediction percentage
wrong_prediction_percentage = (total_wrong / total_samples) * 100

print("Total Right Predictions:", total_right)
print("Total Wrong Predictions:", total_wrong)
print("Wrong Prediction Percentage: {:.2f}%".format(wrong_prediction_percentage))


In [None]:
# Prepare data for plotting
categories = ['Right Predictions', 'Wrong Predictions']
values = [total_right, total_wrong]

# Create a bar plot
plt.figure(figsize=(6, 4))
plt.bar(categories, values, color=['green', 'red'])
plt.ylabel('Count')
plt.title('Right and Wrong Predictions', fontweight='bold')

# Show counts on top of the bars
for i, v in enumerate(values):
    plt.text(i, v + 0.1, str(v), ha='center', fontweight='bold')

plt.show()

##### 🔹 Sensitivity & Specificity
Evaluate each class's **sensitivity (recall)** and **specificity** based on true positives and true negatives


In [None]:
# Compute confusion matrix
cm = confusion_matrix(true_labels, predicted_labels)
n_classes = cm.shape[0]

# If binary classification
if n_classes == 2:
    tn, fp, fn, tp = cm.ravel()
    sensitivity = tp / (tp + fn) if (tp + fn) > 0 else 0
    specificity = tn / (tn + fp) if (tn + fp) > 0 else 0

    print("Binary Classification:")
    print(f"Sensitivity (Recall): {sensitivity:.3f}")
    print(f"Specificity: {specificity:.3f}")

# If multiclass classification
else:
    # Sensitivity = TP / (TP + FN)
    sensitivity = np.diag(cm) / np.sum(cm, axis=1)
    # Specificity = TN / (TN + FP)
    specificity = []
    for i in range(n_classes):
        # For each class, treat it as "positive" vs "rest"
        tn = np.sum(np.delete(np.delete(cm, i, axis=0), i, axis=1))
        fp = np.sum(np.delete(cm, i, axis=0)[:, i])
        specificity.append(tn / (tn + fp) if (tn + fp) > 0 else 0)
    specificity = np.array(specificity)

    print("Multiclass Classification:")
    for i in range(n_classes):
        print(f"Class {i}: Sensitivity={sensitivity[i]:.3f}, Specificity={specificity[i]:.3f}")

    print(f"\nAverage Sensitivity: {np.mean(sensitivity):.3f}")
    print(f"Average Specificity: {np.mean(specificity):.3f}")


In [None]:
# --- Plot Sensitivity and Specificity ---
if n_classes == 2:
    metrics = ['Sensitivity', 'Specificity']
    values = [sensitivity, specificity]

    plt.figure(figsize=(6, 5))
    plt.bar(metrics, values, color=['#1f77b4', '#ff7f0e'])
    plt.ylim(0, 1)
    plt.title("Binary Classification Metrics")
    plt.ylabel("Score")
    plt.grid(axis='y', linestyle='--', alpha=0.7)
    plt.show()

else:
    x = np.arange(n_classes)
    width = 0.35  # Bar width

    plt.figure(figsize=(10, 6))
    plt.bar(x - width/2, sensitivity, width, label='Sensitivity', color='#1f77b4')
    plt.bar(x + width/2, specificity, width, label='Specificity', color='#ff7f0e')

    plt.xticks(x, [f'Class {i}' for i in range(n_classes)])
    plt.ylim(0, 1)
    plt.xlabel("Classes")
    plt.ylabel("Score")
    plt.title("Per-Class Sensitivity and Specificity", fontsize=12, fontweight='bold')
    plt.legend()
    plt.grid(axis='y', linestyle='--', alpha=0.7)
    plt.show()


In [None]:
# Compute confusion matrix
cm = confusion_matrix(true_labels, predicted_labels)
n_classes = cm.shape[0]

# Sensitivity (macro average): TP / (TP + FN)
sensitivity_per_class = np.diag(cm) / np.sum(cm, axis=1)
sensitivity = np.nanmean(sensitivity_per_class)

# Specificity (macro average): TN / (TN + FP)
specificity_list = []
for i in range(n_classes):
    tn = np.sum(np.delete(np.delete(cm, i, axis=0), i, axis=1))
    fp = np.sum(np.delete(cm, i, axis=0)[:, i])
    specificity_list.append(tn / (tn + fp) if (tn + fp) > 0 else np.nan)
specificity = np.nanmean(specificity_list)

print(f"Overall Sensitivity (Recall): {sensitivity:.3f}")
print(f"Overall Specificity: {specificity:.3f}")

# --- Plotting ---
plt.figure(figsize=(5, 4))
plt.bar(['Sensitivity', 'Specificity'], [sensitivity, specificity], color=['skyblue', 'salmon'])
plt.title('Overall Sensitivity and Specificity', fontsize=12, fontweight='bold')
plt.ylim(0, 1)
plt.ylabel('Score')
plt.grid(axis='y', linestyle='--', alpha=0.6)
plt.show()

##### 🔹 Jaccard Index & Dice Score
Measure overlap between predicted and true classes using **Jaccard Index (IoU)** and **Dice Score**.


In [None]:
# Compute confusion matrix
cm = confusion_matrix(true_labels, predicted_labels)
n_classes = cm.shape[0]

jaccard_per_class = []
dice_per_class = []

for i in range(n_classes):
    tp = cm[i, i]
    fp = np.sum(cm[:, i]) - tp
    fn = np.sum(cm[i, :]) - tp

    # Jaccard Index
    jaccard = tp / (tp + fp + fn) if (tp + fp + fn) > 0 else np.nan
    jaccard_per_class.append(jaccard)

    # Dice Score = 2*TP / (2*TP + FP + FN)
    dice = 2*tp / (2*tp + fp + fn) if (2*tp + fp + fn) > 0 else np.nan
    dice_per_class.append(dice)

# Macro-average (overall)
jaccard_index = np.nanmean(jaccard_per_class)
dice_score = np.nanmean(dice_per_class)

print(f"Overall Jaccard Index: {jaccard_index:.3f}")
print(f"Overall Dice Score: {dice_score:.3f}")

# --- Plotting ---
plt.figure(figsize=(6, 4))
plt.bar(['Jaccard Index', 'Dice Score'], [jaccard_index, dice_score], color=['lightgreen', 'skyblue'])
plt.ylim(0, 1)
plt.title('Overall Jaccard Index and Dice Score', fontsize=12, fontweight='bold')
plt.ylabel('Score')
plt.grid(axis='y', linestyle='--', alpha=0.6)
plt.show()

##### 🔹 Model Evaluation Metrics
Summarize the model's performance using multiple metrics: **Accuracy**, **Precision**, **Recall (Sensitivity)**, **F1 Score**, **Negative Predictive Value (NPV)**, **AUC-ROC**

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# Number of classes
n_classes = len(np.unique(true_labels))

# Accuracy, F1, Precision, Recall (macro average for multiclass)
accuracy = accuracy_score(true_labels, predicted_labels)
precision = precision_score(true_labels, predicted_labels, average='macro')
recall = recall_score(true_labels, predicted_labels, average='macro')
f1 = f1_score(true_labels, predicted_labels, average='macro')

# For binary: compute NPV and AUC
if n_classes == 2:
    cm = confusion_matrix(true_labels, predicted_labels)
    tn, fp, fn, tp = cm.ravel()
    npv = tn / (tn + fn) if (tn + fn) > 0 else 0
    auc = roc_auc_score(true_labels, predicted_labels)
else:
    npv = np.nan  # Not defined for multiclass
    auc = np.nan  # Not defined for multiclass

# Store metrics in a dictionary
metrics = {
    'Accuracy': accuracy,
    'Precision': precision,
    'Recall': recall,
    'F1 Score': f1,
    'NPV': npv,
    'AUC-ROC': auc
}

# Plotting
plt.figure(figsize=(12, 5))
plt.bar(metrics.keys(), metrics.values(), color=['blue', 'orange', 'green', 'red', 'purple', 'cyan'])
plt.ylabel('Score')
plt.title('Model Evaluation Metrics', fontsize=12, fontweight='bold')
plt.ylim(0, 1)
plt.axhline(y=0.5, color='grey', linestyle='--')
plt.grid(axis='y')
plt.show()

In [None]:
# Print metric values
for metric, value in metrics.items():
    print(f"{metric}: {value:.4f}")

##### 🔥 Grad-CAM Visualization
Generate **Grad-CAM heatmaps** to visualize the regions of input images that the **CNN + CBAM** model focuses on for its predictions.


In [None]:
# -----------------------------
# Grad-CAM Function
# -----------------------------
def make_gradcam_heatmap(img_array, model, last_conv_layer_name, pred_index=None):
    grad_model = tf.keras.models.Model(
        [model.inputs],
        [model.get_layer(last_conv_layer_name).output, model.output]
    )

    with tf.GradientTape() as tape:
        conv_outputs, predictions = grad_model(img_array)
        if pred_index is None:
            pred_index = tf.argmax(predictions[0])
        class_channel = predictions[:, pred_index]

    grads = tape.gradient(class_channel, conv_outputs)
    pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2))

    conv_outputs = conv_outputs[0]
    heatmap = conv_outputs @ pooled_grads[..., tf.newaxis]
    heatmap = tf.squeeze(heatmap)
    heatmap = tf.maximum(heatmap, 0) / tf.math.reduce_max(heatmap)
    return heatmap.numpy()

# -----------------------------
# Display Heatmap Function
# -----------------------------
def display_gradcam(img_path, heatmap, alpha=0.4):
    img = image.load_img(img_path, target_size=(224,224))
    img = image.img_to_array(img)

    heatmap = np.uint8(255 * heatmap)
    heatmap = np.expand_dims(heatmap, axis=2)
    heatmap = np.repeat(heatmap, 3, axis=2)
    heatmap = tf.image.resize(heatmap, (img.shape[0], img.shape[1])).numpy()

    superimposed_img = heatmap * alpha + img
    superimposed_img = np.uint8(superimposed_img)

    plt.imshow(superimposed_img)
    plt.axis('off')
    plt.show()

# -----------------------------
# Loop Through Classes
# -----------------------------
def gradcam_per_class(model, data_dir, last_conv_layer_name="cbam"):

    class_folders = sorted(os.listdir(data_dir))

    for class_name in class_folders:
        class_path = os.path.join(data_dir, class_name)
        if not os.path.isdir(class_path):
            continue

        # Take first image in folder
        img_name = os.listdir(class_path)[0]
        img_path = os.path.join(class_path, img_name)

        # Load and preprocess image
        img = image.load_img(img_path, target_size=(224,224))
        img_array = image.img_to_array(img)
        img_array = np.expand_dims(img_array, axis=0)
        img_array = tf.keras.applications.inception_v3.preprocess_input(img_array)

        # Generate Grad-CAM
        heatmap = make_gradcam_heatmap(img_array, model, last_conv_layer_name)

        # Plot
        print(f"Grad-CAM for class: {class_name}, image: {img_name}")
        display_gradcam(img_path, heatmap)

# -----------------------------
# Usage
# -----------------------------
data_dir = "/content/drive/MyDrive/sample_dataset/test"  # folder containing subfolders of each class
gradcam_per_class(model, data_dir)


In [None]:
import matplotlib.pyplot as plt
import matplotlib.cm as cm

# -----------------------------
# Grad-CAM Function
# -----------------------------
def make_gradcam_heatmap(img_array, model, last_conv_layer_name, pred_index=None):
    grad_model = tf.keras.models.Model(
        [model.inputs],
        [model.get_layer(last_conv_layer_name).output, model.output]
    )

    with tf.GradientTape() as tape:
        conv_outputs, predictions = grad_model(img_array)
        if pred_index is None:
            pred_index = tf.argmax(predictions[0])
        class_channel = predictions[:, pred_index]

    grads = tape.gradient(class_channel, conv_outputs)
    pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2))

    conv_outputs = conv_outputs[0]
    heatmap = conv_outputs @ pooled_grads[..., tf.newaxis]
    heatmap = tf.squeeze(heatmap)
    heatmap = tf.maximum(heatmap, 0) / tf.math.reduce_max(heatmap)
    return heatmap.numpy()

# -----------------------------
# Overlay Heatmap on Image
# -----------------------------
def overlay_heatmap_on_image(img_path, heatmap, alpha=0.4):
    # Load original image
    img = Image.open(img_path).convert("RGB")
    img = img.resize((224,224))
    img_array = np.array(img)

    # Convert heatmap to RGB using jet colormap
    # Use matplotlib.colormaps.get_cmap to access the colormap
    jet_colormap = matplotlib.colormaps.get_cmap('jet')
    heatmap_colored = jet_colormap(heatmap)[..., :3]  # RGB channels only
    heatmap_colored = np.uint8(heatmap_colored * 255)
    heatmap_image = Image.fromarray(heatmap_colored)

    # Resize heatmap to match original image using updated Pillow method
    heatmap_image = heatmap_image.resize(img_array.shape[:2][::-1], Image.Resampling.LANCZOS)
    heatmap_resized = np.array(heatmap_image)

    # Superimpose heatmap
    superimposed_img = np.uint8(alpha * heatmap_resized + (1 - alpha) * img_array)

    return Image.fromarray(superimposed_img)

# -----------------------------
# Grad-CAM for Each Class
# -----------------------------
def gradcam_per_class(model, data_dir):
    # Automatically get CBAM layer name
    cbam_layer_name = [layer.name for layer in model.layers if "cbam" in layer.name][0]
    print("Using CBAM layer:", cbam_layer_name)

    class_folders = sorted(os.listdir(data_dir))
    for class_name in class_folders:
        class_path = os.path.join(data_dir, class_name)
        if not os.path.isdir(class_path):
            continue

        # Pick first image of class
        img_name = os.listdir(class_path)[0]
        img_path = os.path.join(class_path, img_name)

        # Load and preprocess image
        img = image.load_img(img_path, target_size=(224,224))
        img_array = image.img_to_array(img)
        img_array = np.expand_dims(img_array, axis=0)
        img_array = tf.keras.applications.inception_v3.preprocess_input(img_array)

        # Compute Grad-CAM heatmap
        heatmap = make_gradcam_heatmap(img_array, model, cbam_layer_name)

        # Overlay heatmap
        superimposed_img = overlay_heatmap_on_image(img_path, heatmap)

        # Display
        plt.figure(figsize=(5,5))
        plt.title(f"Class: {class_name}, Image: {img_name}")
        plt.imshow(superimposed_img)
        plt.axis('off')
        plt.show()

# -----------------------------
# Usage
# -----------------------------
data_dir = "/content/drive/MyDrive/sample_dataset/test"  # folder with subfolders for each class
gradcam_per_class(model, data_dir)

##### 🔹 PR-AUC (Precision-Recall AUC)
Evaluate model performance using the **area under the Precision-Recall curve**, especially useful for imbalanced datasets.

In [None]:
from sklearn.metrics import precision_recall_curve, auc

# Get true labels and predicted probabilities
y_true = np.concatenate([y.numpy() for x, y in test_dataset], axis=0)
y_pred_probs = model.predict(test_dataset)

# Handle binary or multiclass
if y_pred_probs.shape[1] == 2:  # binary
    precision, recall, _ = precision_recall_curve(y_true, y_pred_probs[:, 1])
    pr_auc = auc(recall, precision)
else:  # multiclass
    pr_auc = {}
    from sklearn.preprocessing import label_binarize
    y_true_bin = label_binarize(y_true, classes=range(y_pred_probs.shape[1]))
    for i in range(y_pred_probs.shape[1]):
        precision, recall, _ = precision_recall_curve(y_true_bin[:, i], y_pred_probs[:, i])
        pr_auc[i] = auc(recall, precision)

print("PR-AUC:", pr_auc)


##### 🔹 Log Loss / Cross-Entropy Loss
Evaluate prediction confidence using **log loss** (cross-entropy) between true and predicted probabilities.

In [None]:
from sklearn.metrics import log_loss

y_true = np.concatenate([y.numpy() for x, y in test_dataset], axis=0)
y_pred_probs = model.predict(test_dataset)

loss = log_loss(y_true, y_pred_probs)
print("Log Loss / Cross-Entropy Loss:", loss)


##### 🔹 Top-k Accuracy
Measure if the **true label** is among the model's **top k predicted classes** in multiclass classification.

In [None]:
# Top-k accuracy automatically for multiclass
k = 3  # you can change k
top_k_acc = tf.keras.metrics.TopKCategoricalAccuracy(k=k)

y_true = np.concatenate([y.numpy() for x, y in test_dataset], axis=0)
y_pred_probs = model.predict(test_dataset)

# Convert binary labels to categorical if needed
if y_pred_probs.shape[1] == 2 and len(np.unique(y_true)) == 2:
    from tensorflow.keras.utils import to_categorical
    y_true_cat = to_categorical(y_true, num_classes=2)
else:
    y_true_cat = tf.keras.utils.to_categorical(y_true, num_classes=y_pred_probs.shape[1])

top_k_acc.update_state(y_true_cat, y_pred_probs)
print(f"Top-{k} Accuracy:", top_k_acc.result().numpy())

##### 🔹 G-Mean (Geometric Mean of Sensitivity & Specificity)
Compute the **G-Mean** to evaluate balanced classification performance.

In [None]:
y_true = np.concatenate([y.numpy() for x, y in test_dataset], axis=0)
y_pred = np.argmax(model.predict(test_dataset), axis=1)

cm = confusion_matrix(y_true, y_pred)
if cm.shape[0] == 2:  # binary
    tn, fp, fn, tp = cm.ravel()
    sensitivity = tp / (tp + fn)
    specificity = tn / (tn + fp)
    g_mean = math.sqrt(sensitivity * specificity)
else:  # multiclass: compute G-mean per class and average
    sensitivity_list = []
    specificity_list = []
    for i in range(cm.shape[0]):
        tp = cm[i, i]
        fn = cm[i, :].sum() - tp
        fp = cm[:, i].sum() - tp
        tn = cm.sum() - (tp + fn + fp)
        sensitivity_list.append(tp / (tp + fn) if (tp+fn)>0 else 0)
        specificity_list.append(tn / (tn + fp) if (tn+fp)>0 else 0)
    g_mean = np.mean(np.sqrt(np.array(sensitivity_list) * np.array(specificity_list)))

print("G-Mean:", g_mean)

#**<font color='red'>ResNet50 + ECA (Efficient Channel Attention)</font>**


## Step 3.3: Model Training

##### 🔗 CNN with ECA
Add **ECA (Efficient Channel Attention)** modules to let the model **learn which channels are most useful** without adding extra layers.

In [None]:
# -----------------------------
# ECA Block
# -----------------------------
class ECABlock(layers.Layer):
    def __init__(self, channels, gamma=2, b=1, **kwargs):
        super(ECABlock, self).__init__(**kwargs)
        # Determine adaptive kernel size
        t = int(abs((tf.math.log(tf.cast(channels, tf.float32)) / tf.math.log(2.0)) + b) / gamma)
        k = t if t % 2 else t + 1
        self.avg_pool = layers.GlobalAveragePooling2D()
        self.conv1d = layers.Conv1D(1, kernel_size=k, padding="same", use_bias=False)
        self.sigmoid = layers.Activation("sigmoid")

    def call(self, inputs):
        # inputs: (batch, H, W, C)
        y = self.avg_pool(inputs)             # (batch, C)
        y = tf.expand_dims(y, axis=-1)        # (batch, C, 1)
        y = self.conv1d(y)                     # (batch, C, 1)
        y = self.sigmoid(y)
        y = tf.reshape(y, [-1, 1, 1, inputs.shape[-1]])  # (batch, 1, 1, C)
        return inputs * y                      # broadcast across H, W


# -----------------------------
# Model with ECA
# -----------------------------
def create_resnet_eca(image_shape=(224,224,3), num_classes=n_classes):
    base_model = tf.keras.applications.ResNet50(input_shape=image_shape,
                                                   include_top=False,
                                                   weights="imagenet")
    base_model.trainable = True
    for layer in base_model.layers[0:291]:
        layer.trainable = False

    inputs = tf.keras.Input(shape=image_shape)
    x = tf.keras.applications.resnet.preprocess_input(inputs)
    x = base_model(x, training=False)          # (None, H, W, 2048)

    # Insert fixed ECA
    x = ECABlock(channels=2048, name="eca_block")(x)

    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dropout(0.5)(x)
    outputs = layers.Dense(num_classes, activation="softmax")(x)

    return tf.keras.Model(inputs, outputs)

##### 🔹 Build and Compile Model

In [None]:
# -----------------------------
# Build and Compile
# -----------------------------
base_learning_rate = 0.0001
n_classes = 3  # change this according to your dataset

model = create_resnet_eca(image_shape=(224,224,3), num_classes=n_classes)

model.compile(
    optimizer=tf.keras.optimizers.Nadam(learning_rate=base_learning_rate),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(),
    metrics=["accuracy"]
)

model.summary()

##### 🔹 Load / Set Model Weights
Load pre-trained weights or initialize custom weights for the CNN + CBAM model.


In [None]:
# Assuming you already have Dataset as a list of samples per class
# or you can compute from train_dataset using:
# class_counts = [sum(1 for _, label in train_dataset.unbatch().as_numpy_iterator() if label==i) for i in range(n_classes)]

total = sum(Dataset)        # total number of images
class_weight = {}

for i, count in enumerate(Dataset):
    class_weight[i] = (1 / count) * (total / len(Dataset))

# Print class weights
for i, w in class_weight.items():
    print(f"Weight for class {i}: {w:.2f}")


##### 🔹 Model Checkpoint & Save
Save the best model during training using **checkpoints**, and optionally save the final trained model.


In [None]:
from tensorflow.keras.callbacks import ModelCheckpoint
## set the path name as *dataset/Lr/optimizer_name/model_name*
model_filepath="/content/drive/MyDrive/sample_dataset/resnet50_eca-{epoch:02d}-{val_accuracy:.4f}.keras"
checkpoint = ModelCheckpoint(
    filepath = model_filepath ,
    monitor ='val_accuracy',
    mode = 'max' ,
    save_best_only =True ,
    verbose = 1
)


##### 🔹 Train the Model
Train the CNN + CBAM model using the training dataset, validate on the validation dataset, and store the training history.


In [None]:
## change hyperparameter such as epoches
history = model.fit(train_dataset , verbose=2 , epochs=5 , class_weight=class_weight ,                         validation_data=valid_dataset ,
                    callbacks =[checkpoint])

##### 🔹 Test the Model
Evaluate the trained model on the test dataset.

In [None]:
model.evaluate(test_dataset , verbose = 1)

## Step 4.3: Results & Visualizations


##### 🔹 Actual vs Predicted Classes

*  Visualize the model’s predictions compared to true labels on the test dataset.
*   Collect **one example per class** from `test_dataset`.




In [None]:
examples = {}
for images, labels in test_dataset.unbatch().take(1000):  # take enough to find all classes
    class_idx = labels.numpy()
    if class_idx not in examples:
        examples[class_idx] = images
    if len(examples) == n_classes:
        break

# Plotting
cols = 2  # number of columns
rows = math.ceil(n_classes / cols)
plt.figure(figsize=(cols * 5, rows * 5))

for i, class_idx in enumerate(sorted(examples.keys())):
    img = examples[class_idx].numpy().astype("uint8")
    img_exp = tf.expand_dims(img, 0)  # expand batch dim
    predict = model.predict(img_exp)
    predicted = class_names[np.argmax(predict)]
    actual = class_names[class_idx]

    plt.subplot(rows, cols, i + 1)
    plt.imshow(img)
    plt.axis("off")
    color = 'blue' if predicted == actual else 'red'
    plt.title(f"Pred: {predicted}\nActual: {actual}", fontsize=12, fontweight='bold', color=color)
    plt.subplots_adjust(left=0.1, bottom=0.1, right=0.9,
                        top=0.9, wspace=0.4, hspace=0.4)

plt.show()

##### 🔹 Actual vs Predicted Images

*   Display sample images from the test set with their **true labels and model predictions** for qualitative evaluation.
*   Show **three images per class** for qualitative evaluation.


In [None]:
# Function to load multiple random images per class
def load_images_per_class(folder, num_images_per_class=3):
    images = []
    labels = []
    class_names = []

    for class_idx, subfolder in enumerate(sorted(os.listdir(folder))):
        subfolder_path = os.path.join(folder, subfolder)
        if os.path.isdir(subfolder_path):
            class_names.append(subfolder)
            image_files = os.listdir(subfolder_path)
            selected_files = random.sample(image_files, min(num_images_per_class, len(image_files)))
            for image_file in selected_files:
                img_path = os.path.join(subfolder_path, image_file)
                img = Image.open(img_path).convert('RGB')
                img = img.resize((64, 64))
                images.append(np.array(img)/255.0)
                labels.append(class_idx)

    return np.array(images), np.array(labels), class_names

# Path to your test folder
test_folder = '/content/drive/MyDrive/sample_dataset/test'

# Load images: change num_images_per_class as needed
num_images_per_class = 3
images, labels, class_names = load_images_per_class(test_folder, num_images_per_class=num_images_per_class)

# Example predicted labels (replace with your model predictions)
predicted_labels = labels.copy()  # For demo, assume correct predictions

# Automatically calculate subplot grid
total_images = len(images)
cols = 2  # Original + Predicted
rows = total_images  # Each image gets a row

fig, axes = plt.subplots(rows, cols, figsize=(cols*5, rows*4))

if rows == 1:  # Special case if only 1 image
    axes = np.expand_dims(axes, axis=0)

for i in range(total_images):
    img = images[i]
    true_label = class_names[labels[i]]
    predicted_label = class_names[predicted_labels[i]]

    # Original
    axes[i, 0].imshow(img)
    axes[i, 0].set_title(f'True: {true_label}', fontsize=12, fontweight='bold')
    axes[i, 0].axis('off')

    # Predicted
    axes[i, 1].imshow(img)
    color = 'blue' if true_label == predicted_label else 'red'
    axes[i, 1].set_title(f'Predicted: {predicted_label}', fontsize=12, fontweight='bold', color=color)
    axes[i, 1].axis('off')

plt.tight_layout()
plt.show()


##### 📈 Training Accuracy & Loss
Visualize the model's **training and validation accuracy and loss** over epochs to assess learning and overfitting.

In [None]:
acc = [0.] + history.history['accuracy']
val_acc = [0.] + history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

plt.figure(figsize=(17, 12))
plt.subplot(2, 2, 1)
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.ylabel('Accuracy')
plt.ylim([min(plt.ylim()),1])
plt.title('Training and Validation Accuracy', fontweight='bold')

plt.subplot(2, 2, 2)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.ylabel('Cross Entropy')
plt.ylim([0,3.0])
plt.title('Training and Validation Loss', fontweight='bold')
plt.xlabel('epoch')
plt.show()

##### 🔹 Label Binarization & Classification Metrics
Binarize class labels and evaluate model performance using metrics like **accuracy, precision, recall, and F1-score**.


In [None]:
from sklearn.preprocessing import label_binarize

# Initialize empty lists to store true labels and predicted labels
true_labels = []
predicted_labels = []

# Iterate through the validation dataset and make predictions
for images, labels in test_dataset:
    predictions = model.predict(images)
    predicted_labels.extend(np.argmax(predictions, axis=1))
    true_labels.extend(labels.numpy())

# Binarize the true and predicted labels
true_labels_bin = label_binarize(true_labels, classes=np.unique(true_labels))
predicted_labels_bin = label_binarize(predicted_labels, classes=np.unique(predicted_labels))

##### 🔹 Classification Report
Provides precision, recall, F1-score, and support for each class to summarize model performance.

In [None]:
from sklearn.metrics import classification_report

# Generate the classification report
report = classification_report(true_labels, predicted_labels, target_names=class_names)

# Print the classification report
print(report)

##### 📈 ROC curve
*   Plot the **ROC curve** to evaluate model performance.
*  **One-vs-Rest** for multiclass and
 **One-vs-One** for binary classification.

In [None]:
from sklearn.metrics import roc_curve, auc

# Get true labels and predictions
y_true = []
y_pred = []

for images, labels in test_dataset:     # or valid_dataset if you want
    preds = model.predict(images)
    y_true.extend(labels.numpy())
    y_pred.extend(preds)

y_true = np.array(y_true)
y_pred = np.array(y_pred)

print("Shape of predictions:", y_pred.shape)


In [None]:
# Binarize labels for multiclass ROC
y_true_bin = label_binarize(y_true, classes=list(range(n_classes)))

In [None]:
fpr = dict()
tpr = dict()
roc_auc = dict()

for i in range(n_classes):
    fpr[i], tpr[i], _ = roc_curve(y_true_bin[:, i], y_pred[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])


In [None]:
import matplotlib.cm as cm

plt.figure(figsize=(8, 6))

# Automatically generate colors
colors = cm.get_cmap('tab20', n_classes)  # 'tab20' or 'tab10', n_classes colors

for i in range(n_classes):
    plt.plot(fpr[i], tpr[i], color=colors(i), lw=2,
             label=f'{class_names[i]} (AUC = {roc_auc[i]:.2f})')

plt.plot([0, 1], [0, 1], 'k--', lw=1)
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve', fontweight='bold')
plt.legend(loc='lower right')
plt.grid(True)
plt.show()


##### 🔹 Confusion Matrix
Visualize the **confusion matrix** to assess class-wise prediction performance.

In [None]:
from sklearn.metrics import confusion_matrix

conf_matrix = confusion_matrix(true_labels, predicted_labels)

# Plot the confusion matrix as a heatmap
plt.figure(figsize=(5, 3))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.title('Confusion Matrix', fontweight='bold')
plt.show()

##### 🔹 Cohen's Kappa
Measure agreement between predicted and true labels beyond chance.

In [None]:
from sklearn.metrics import cohen_kappa_score

kappa = cohen_kappa_score(true_labels, predicted_labels)
print(f'Cohen\'s Kappa: {kappa:.4f}')

##### 🔹 Matthews Correlation Coefficient (MCC)
Assess overall classification quality considering all confusion matrix terms.

In [None]:
from sklearn.metrics import matthews_corrcoef

# Assuming true_labels and predicted_labels are multiclass labels
mcc_values = [matthews_corrcoef(true_labels == i, predicted_labels == i) for i in np.unique(true_labels)]

average_mcc = np.mean(mcc_values)
print(f'Average Matthews Correlation Coefficient for Multiclass: {average_mcc:.4f}')

##### 🔹 Right vs Wrong Classifier
Visualize and analyze **correctly and incorrectly classified samples** to understand model performance.

In [None]:
# Calculate total right and wrong predictions
total_right = sum(1 for true, pred in zip(true_labels, predicted_labels) if true == pred)
total_wrong = sum(1 for true, pred in zip(true_labels, predicted_labels) if true != pred)
total_samples = len(true_labels)

# Calculate wrong prediction percentage
wrong_prediction_percentage = (total_wrong / total_samples) * 100

print("Total Right Predictions:", total_right)
print("Total Wrong Predictions:", total_wrong)
print("Wrong Prediction Percentage: {:.2f}%".format(wrong_prediction_percentage))


In [None]:
# Prepare data for plotting
categories = ['Right Predictions', 'Wrong Predictions']
values = [total_right, total_wrong]

# Create a bar plot
plt.figure(figsize=(6, 4))
plt.bar(categories, values, color=['green', 'red'])
plt.ylabel('Count')
plt.title('Right and Wrong Predictions', fontweight='bold')

# Show counts on top of the bars
for i, v in enumerate(values):
    plt.text(i, v + 0.1, str(v), ha='center', fontweight='bold')

plt.show()

##### 🔹 Sensitivity & Specificity
Evaluate each class's **sensitivity (recall)** and **specificity** based on true positives and true negatives


In [None]:
# Compute confusion matrix
cm = confusion_matrix(true_labels, predicted_labels)
n_classes = cm.shape[0]

# If binary classification
if n_classes == 2:
    tn, fp, fn, tp = cm.ravel()
    sensitivity = tp / (tp + fn) if (tp + fn) > 0 else 0
    specificity = tn / (tn + fp) if (tn + fp) > 0 else 0

    print("Binary Classification:")
    print(f"Sensitivity (Recall): {sensitivity:.3f}")
    print(f"Specificity: {specificity:.3f}")

# If multiclass classification
else:
    # Sensitivity = TP / (TP + FN)
    sensitivity = np.diag(cm) / np.sum(cm, axis=1)
    # Specificity = TN / (TN + FP)
    specificity = []
    for i in range(n_classes):
        # For each class, treat it as "positive" vs "rest"
        tn = np.sum(np.delete(np.delete(cm, i, axis=0), i, axis=1))
        fp = np.sum(np.delete(cm, i, axis=0)[:, i])
        specificity.append(tn / (tn + fp) if (tn + fp) > 0 else 0)
    specificity = np.array(specificity)

    print("Multiclass Classification:")
    for i in range(n_classes):
        print(f"Class {i}: Sensitivity={sensitivity[i]:.3f}, Specificity={specificity[i]:.3f}")

    print(f"\nAverage Sensitivity: {np.mean(sensitivity):.3f}")
    print(f"Average Specificity: {np.mean(specificity):.3f}")


In [None]:
# --- Plot Sensitivity and Specificity ---
if n_classes == 2:
    metrics = ['Sensitivity', 'Specificity']
    values = [sensitivity, specificity]

    plt.figure(figsize=(6, 5))
    plt.bar(metrics, values, color=['#1f77b4', '#ff7f0e'])
    plt.ylim(0, 1)
    plt.title("Binary Classification Metrics")
    plt.ylabel("Score")
    plt.grid(axis='y', linestyle='--', alpha=0.7)
    plt.show()

else:
    x = np.arange(n_classes)
    width = 0.35  # Bar width

    plt.figure(figsize=(10, 6))
    plt.bar(x - width/2, sensitivity, width, label='Sensitivity', color='#1f77b4')
    plt.bar(x + width/2, specificity, width, label='Specificity', color='#ff7f0e')

    plt.xticks(x, [f'Class {i}' for i in range(n_classes)])
    plt.ylim(0, 1)
    plt.xlabel("Classes")
    plt.ylabel("Score")
    plt.title("Per-Class Sensitivity and Specificity", fontsize=12, fontweight='bold')
    plt.legend()
    plt.grid(axis='y', linestyle='--', alpha=0.7)
    plt.show()


In [None]:
# Compute confusion matrix
cm = confusion_matrix(true_labels, predicted_labels)
n_classes = cm.shape[0]

# Sensitivity (macro average): TP / (TP + FN)
sensitivity_per_class = np.diag(cm) / np.sum(cm, axis=1)
sensitivity = np.nanmean(sensitivity_per_class)

# Specificity (macro average): TN / (TN + FP)
specificity_list = []
for i in range(n_classes):
    tn = np.sum(np.delete(np.delete(cm, i, axis=0), i, axis=1))
    fp = np.sum(np.delete(cm, i, axis=0)[:, i])
    specificity_list.append(tn / (tn + fp) if (tn + fp) > 0 else np.nan)
specificity = np.nanmean(specificity_list)

print(f"Overall Sensitivity (Recall): {sensitivity:.3f}")
print(f"Overall Specificity: {specificity:.3f}")

# --- Plotting ---
plt.figure(figsize=(5, 4))
plt.bar(['Sensitivity', 'Specificity'], [sensitivity, specificity], color=['skyblue', 'salmon'])
plt.title('Overall Sensitivity and Specificity', fontsize=12, fontweight='bold')
plt.ylim(0, 1)
plt.ylabel('Score')
plt.grid(axis='y', linestyle='--', alpha=0.6)
plt.show()

##### 🔹 Jaccard Index & Dice Score
Measure overlap between predicted and true classes using **Jaccard Index (IoU)** and **Dice Score**.


In [None]:
# Compute confusion matrix
cm = confusion_matrix(true_labels, predicted_labels)
n_classes = cm.shape[0]

jaccard_per_class = []
dice_per_class = []

for i in range(n_classes):
    tp = cm[i, i]
    fp = np.sum(cm[:, i]) - tp
    fn = np.sum(cm[i, :]) - tp

    # Jaccard Index
    jaccard = tp / (tp + fp + fn) if (tp + fp + fn) > 0 else np.nan
    jaccard_per_class.append(jaccard)

    # Dice Score = 2*TP / (2*TP + FP + FN)
    dice = 2*tp / (2*tp + fp + fn) if (2*tp + fp + fn) > 0 else np.nan
    dice_per_class.append(dice)

# Macro-average (overall)
jaccard_index = np.nanmean(jaccard_per_class)
dice_score = np.nanmean(dice_per_class)

print(f"Overall Jaccard Index: {jaccard_index:.3f}")
print(f"Overall Dice Score: {dice_score:.3f}")

# --- Plotting ---
plt.figure(figsize=(6, 4))
plt.bar(['Jaccard Index', 'Dice Score'], [jaccard_index, dice_score], color=['lightgreen', 'skyblue'])
plt.ylim(0, 1)
plt.title('Overall Jaccard Index and Dice Score', fontsize=12, fontweight='bold')
plt.ylabel('Score')
plt.grid(axis='y', linestyle='--', alpha=0.6)
plt.show()

##### 🔹 Model Evaluation Metrics
Summarize the model's performance using multiple metrics: **Accuracy**, **Precision**, **Recall (Sensitivity)**, **F1 Score**, **Negative Predictive Value (NPV)**, **AUC-ROC**

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# Number of classes
n_classes = len(np.unique(true_labels))

# Accuracy, F1, Precision, Recall (macro average for multiclass)
accuracy = accuracy_score(true_labels, predicted_labels)
precision = precision_score(true_labels, predicted_labels, average='macro')
recall = recall_score(true_labels, predicted_labels, average='macro')
f1 = f1_score(true_labels, predicted_labels, average='macro')

# For binary: compute NPV and AUC
if n_classes == 2:
    cm = confusion_matrix(true_labels, predicted_labels)
    tn, fp, fn, tp = cm.ravel()
    npv = tn / (tn + fn) if (tn + fn) > 0 else 0
    auc = roc_auc_score(true_labels, predicted_labels)
else:
    npv = np.nan  # Not defined for multiclass
    auc = np.nan  # Not defined for multiclass

# Store metrics in a dictionary
metrics = {
    'Accuracy': accuracy,
    'Precision': precision,
    'Recall': recall,
    'F1 Score': f1,
    'NPV': npv,
    'AUC-ROC': auc
}

# Plotting
plt.figure(figsize=(12, 5))
plt.bar(metrics.keys(), metrics.values(), color=['blue', 'orange', 'green', 'red', 'purple', 'cyan'])
plt.ylabel('Score')
plt.title('Model Evaluation Metrics', fontsize=12, fontweight='bold')
plt.ylim(0, 1)
plt.axhline(y=0.5, color='grey', linestyle='--')
plt.grid(axis='y')
plt.show()

In [None]:
# Print metric values
for metric, value in metrics.items():
    print(f"{metric}: {value:.4f}")

##### 🔥 Grad-CAM Visualization
Generate **Grad-CAM heatmaps** to visualize the regions of input images that the **CNN + CBAM** model focuses on for its predictions.


In [None]:
# -----------------------------
# Grad-CAM Function
# -----------------------------
def make_gradcam_heatmap(img_array, model, last_conv_layer_name, pred_index=None):
    grad_model = tf.keras.models.Model(
        [model.inputs],
        [model.get_layer(last_conv_layer_name).output, model.output]
    )

    with tf.GradientTape() as tape:
        conv_outputs, predictions = grad_model(img_array)
        if pred_index is None:
            pred_index = tf.argmax(predictions[0])
        class_channel = predictions[:, pred_index]

    grads = tape.gradient(class_channel, conv_outputs)
    pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2))

    conv_outputs = conv_outputs[0]
    heatmap = conv_outputs @ pooled_grads[..., tf.newaxis]
    heatmap = tf.squeeze(heatmap)
    heatmap = tf.maximum(heatmap, 0) / tf.math.reduce_max(heatmap)
    return heatmap.numpy()

# -----------------------------
# Display Heatmap Function
# -----------------------------
def display_gradcam(img_path, heatmap, alpha=0.4):
    img = image.load_img(img_path, target_size=(224,224))
    img = image.img_to_array(img)

    heatmap = np.uint8(255 * heatmap)
    heatmap = np.expand_dims(heatmap, axis=2)
    heatmap = np.repeat(heatmap, 3, axis=2)
    heatmap = tf.image.resize(heatmap, (img.shape[0], img.shape[1])).numpy()

    superimposed_img = heatmap * alpha + img
    superimposed_img = np.uint8(superimposed_img)

    plt.imshow(superimposed_img)
    plt.axis('off')
    plt.show()

# -----------------------------
# Loop Through Classes
# -----------------------------
def gradcam_per_class(model, data_dir, last_conv_layer_name="cbam"):

    class_folders = sorted(os.listdir(data_dir))

    for class_name in class_folders:
        class_path = os.path.join(data_dir, class_name)
        if not os.path.isdir(class_path):
            continue

        # Take first image in folder
        img_name = os.listdir(class_path)[0]
        img_path = os.path.join(class_path, img_name)

        # Load and preprocess image
        img = image.load_img(img_path, target_size=(224,224))
        img_array = image.img_to_array(img)
        img_array = np.expand_dims(img_array, axis=0)
        img_array = tf.keras.applications.inception_v3.preprocess_input(img_array)

        # Generate Grad-CAM
        heatmap = make_gradcam_heatmap(img_array, model, last_conv_layer_name)

        # Plot
        print(f"Grad-CAM for class: {class_name}, image: {img_name}")
        display_gradcam(img_path, heatmap)

# -----------------------------
# Usage
# -----------------------------
data_dir = "/content/drive/MyDrive/sample_dataset/test"  # folder containing subfolders of each class
gradcam_per_class(model, data_dir)


In [None]:
import matplotlib.pyplot as plt
import matplotlib.cm as cm

# -----------------------------
# Grad-CAM Function
# -----------------------------
def make_gradcam_heatmap(img_array, model, last_conv_layer_name, pred_index=None):
    grad_model = tf.keras.models.Model(
        [model.inputs],
        [model.get_layer(last_conv_layer_name).output, model.output]
    )

    with tf.GradientTape() as tape:
        conv_outputs, predictions = grad_model(img_array)
        if pred_index is None:
            pred_index = tf.argmax(predictions[0])
        class_channel = predictions[:, pred_index]

    grads = tape.gradient(class_channel, conv_outputs)
    pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2))

    conv_outputs = conv_outputs[0]
    heatmap = conv_outputs @ pooled_grads[..., tf.newaxis]
    heatmap = tf.squeeze(heatmap)
    heatmap = tf.maximum(heatmap, 0) / tf.math.reduce_max(heatmap)
    return heatmap.numpy()

# -----------------------------
# Overlay Heatmap on Image
# -----------------------------
def overlay_heatmap_on_image(img_path, heatmap, alpha=0.4):
    # Load original image
    img = Image.open(img_path).convert("RGB")
    img = img.resize((224,224))
    img_array = np.array(img)

    # Convert heatmap to RGB using jet colormap
    # Use matplotlib.colormaps.get_cmap to access the colormap
    jet_colormap = matplotlib.colormaps.get_cmap('jet')
    heatmap_colored = jet_colormap(heatmap)[..., :3]  # RGB channels only
    heatmap_colored = np.uint8(heatmap_colored * 255)
    heatmap_image = Image.fromarray(heatmap_colored)

    # Resize heatmap to match original image using updated Pillow method
    heatmap_image = heatmap_image.resize(img_array.shape[:2][::-1], Image.Resampling.LANCZOS)
    heatmap_resized = np.array(heatmap_image)

    # Superimpose heatmap
    superimposed_img = np.uint8(alpha * heatmap_resized + (1 - alpha) * img_array)

    return Image.fromarray(superimposed_img)

# -----------------------------
# Grad-CAM for Each Class
# -----------------------------
def gradcam_per_class(model, data_dir):
    # Automatically get CBAM layer name
    cbam_layer_name = [layer.name for layer in model.layers if "cbam" in layer.name][0]
    print("Using CBAM layer:", cbam_layer_name)

    class_folders = sorted(os.listdir(data_dir))
    for class_name in class_folders:
        class_path = os.path.join(data_dir, class_name)
        if not os.path.isdir(class_path):
            continue

        # Pick first image of class
        img_name = os.listdir(class_path)[0]
        img_path = os.path.join(class_path, img_name)

        # Load and preprocess image
        img = image.load_img(img_path, target_size=(224,224))
        img_array = image.img_to_array(img)
        img_array = np.expand_dims(img_array, axis=0)
        img_array = tf.keras.applications.inception_v3.preprocess_input(img_array)

        # Compute Grad-CAM heatmap
        heatmap = make_gradcam_heatmap(img_array, model, cbam_layer_name)

        # Overlay heatmap
        superimposed_img = overlay_heatmap_on_image(img_path, heatmap)

        # Display
        plt.figure(figsize=(5,5))
        plt.title(f"Class: {class_name}, Image: {img_name}")
        plt.imshow(superimposed_img)
        plt.axis('off')
        plt.show()

# -----------------------------
# Usage
# -----------------------------
data_dir = "/content/drive/MyDrive/sample_dataset/test"  # folder with subfolders for each class
gradcam_per_class(model, data_dir)

##### 🔹 PR-AUC (Precision-Recall AUC)
Evaluate model performance using the **area under the Precision-Recall curve**, especially useful for imbalanced datasets.

In [None]:
from sklearn.metrics import precision_recall_curve, auc

# Get true labels and predicted probabilities
y_true = np.concatenate([y.numpy() for x, y in test_dataset], axis=0)
y_pred_probs = model.predict(test_dataset)

# Handle binary or multiclass
if y_pred_probs.shape[1] == 2:  # binary
    precision, recall, _ = precision_recall_curve(y_true, y_pred_probs[:, 1])
    pr_auc = auc(recall, precision)
else:  # multiclass
    pr_auc = {}
    from sklearn.preprocessing import label_binarize
    y_true_bin = label_binarize(y_true, classes=range(y_pred_probs.shape[1]))
    for i in range(y_pred_probs.shape[1]):
        precision, recall, _ = precision_recall_curve(y_true_bin[:, i], y_pred_probs[:, i])
        pr_auc[i] = auc(recall, precision)

print("PR-AUC:", pr_auc)


##### 🔹 Log Loss / Cross-Entropy Loss
Evaluate prediction confidence using **log loss** (cross-entropy) between true and predicted probabilities.

In [None]:
from sklearn.metrics import log_loss

y_true = np.concatenate([y.numpy() for x, y in test_dataset], axis=0)
y_pred_probs = model.predict(test_dataset)

loss = log_loss(y_true, y_pred_probs)
print("Log Loss / Cross-Entropy Loss:", loss)


##### 🔹 Top-k Accuracy
Measure if the **true label** is among the model's **top k predicted classes** in multiclass classification.

In [None]:
# Top-k accuracy automatically for multiclass
k = 3  # you can change k
top_k_acc = tf.keras.metrics.TopKCategoricalAccuracy(k=k)

y_true = np.concatenate([y.numpy() for x, y in test_dataset], axis=0)
y_pred_probs = model.predict(test_dataset)

# Convert binary labels to categorical if needed
if y_pred_probs.shape[1] == 2 and len(np.unique(y_true)) == 2:
    from tensorflow.keras.utils import to_categorical
    y_true_cat = to_categorical(y_true, num_classes=2)
else:
    y_true_cat = tf.keras.utils.to_categorical(y_true, num_classes=y_pred_probs.shape[1])

top_k_acc.update_state(y_true_cat, y_pred_probs)
print(f"Top-{k} Accuracy:", top_k_acc.result().numpy())

##### 🔹 G-Mean (Geometric Mean of Sensitivity & Specificity)
Compute the **G-Mean** to evaluate balanced classification performance.

In [None]:
y_true = np.concatenate([y.numpy() for x, y in test_dataset], axis=0)
y_pred = np.argmax(model.predict(test_dataset), axis=1)

cm = confusion_matrix(y_true, y_pred)
if cm.shape[0] == 2:  # binary
    tn, fp, fn, tp = cm.ravel()
    sensitivity = tp / (tp + fn)
    specificity = tn / (tn + fp)
    g_mean = math.sqrt(sensitivity * specificity)
else:  # multiclass: compute G-mean per class and average
    sensitivity_list = []
    specificity_list = []
    for i in range(cm.shape[0]):
        tp = cm[i, i]
        fn = cm[i, :].sum() - tp
        fp = cm[:, i].sum() - tp
        tn = cm.sum() - (tp + fn + fp)
        sensitivity_list.append(tp / (tp + fn) if (tp+fn)>0 else 0)
        specificity_list.append(tn / (tn + fp) if (tn+fp)>0 else 0)
    g_mean = np.mean(np.sqrt(np.array(sensitivity_list) * np.array(specificity_list)))

print("G-Mean:", g_mean)