# CSP 571 Project by:-
# Aman Kumar A20538809
# Prachi Kotadia A20549927
# Vinay Jaikumar Gupta A20554266
# R Prem Swaroopa Nanda A20547712
# Ganapathi Subramaniam A20536260
# Title:- High-Efficiency Neural Networks for Mobile Vision Tasks

In [None]:
# Step 1: Manually download and extract PASCAL VOC 2007 in Colab
import os

manual_dir = '/root/tensorflow_datasets/downloads/manual'
os.makedirs(manual_dir, exist_ok=True)

!wget -nc http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar -P {manual_dir}
!wget -nc http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar -P {manual_dir}
!tar -xf {manual_dir}/VOCtrainval_06-Nov-2007.tar -C {manual_dir}
!tar -xf {manual_dir}/VOCtest_06-Nov-2007.tar -C {manual_dir}


# Importing the required Modules

In [None]:
from tensorflow.keras.layers import Input, Conv2D, DepthwiseConv2D, ReLU, BatchNormalization, GlobalAveragePooling2D, Dense, Add
from tensorflow.keras.models import Model
import h5py
import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np
import os
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt
import tensorflow_datasets as tfds
import seaborn as sns
from sklearn.metrics import classification_report, roc_auc_score


# Inverted Residual Block with Linear Bottleneck Implementation

In [None]:
# Inverted residual block with linear bottleneck
def inverted_residual_block(x, filters, stride, expansion):
    shortcut = x
    expanded_filters = tf.keras.backend.int_shape(x)[-1] * expansion

    # The Expansion layer
    if expansion != 1:
        x = Conv2D(expanded_filters, kernel_size=1, padding='same', use_bias=False)(x)
        x = BatchNormalization()(x)
        x = ReLU(6.)(x)

    # Depthwise Convolution
    x = DepthwiseConv2D(kernel_size=3, strides=stride, padding='same', use_bias=False)(x)
    x = BatchNormalization()(x)
    x = ReLU(6.)(x)

    # Below is the Projection layer which reduces channels back to `filters`
    x = Conv2D(filters, kernel_size=1, padding='same', use_bias=False)(x)
    x = BatchNormalization()(x)

    # This below is the Residual connection to check if dimensions match
    if stride == 1 and tf.keras.backend.int_shape(shortcut)[-1] == filters:
        x = Add()([x, shortcut])

    return x

# MobileNetV2 Architecture Definition

In [None]:
# Here we have defined the Mobile Net V2 architecture
def MobileNetV2(input_shape=(224, 224, 3), num_classes=20):
    inputs = Input(shape=input_shape)

    # Initial convolution layer
    x = Conv2D(32, kernel_size=3, strides=2, padding='same', use_bias=False)(inputs)
    x = BatchNormalization()(x)
    x = ReLU(6.)(x)

    # This is the block configurations for Mobile net V2
    inverted_residual_setting = [
        (1, 16, 1, 1),
        (6, 24, 2, 2),
        (6, 32, 2, 3),
        (6, 64, 2, 4),
        (6, 96, 1, 3),
        (6, 160, 2, 3),
        (6, 320, 1, 1),
    ]

    for t, c, s, n in inverted_residual_setting:
        for i in range(n):
            x = inverted_residual_block(x, c, s if i == 0 else 1, t)

    # Final layers of the Mobile net v2 architecture
    x = Conv2D(1280, kernel_size=1, padding='same', use_bias=False)(x)
    x = BatchNormalization()(x)
    x = ReLU(6.)(x)
    x = GlobalAveragePooling2D()(x)
    outputs = Dense(num_classes, activation='softmax')(x)

    model = Model(inputs, outputs)
    model.load_weights('/content/custom_mobilenetv2_imagenet_weights_for_20_classes.weights.h5')

    return model

# MobileNetV2 Model Creation and Summary

In [None]:
import matplotlib.pyplot as plt

# Plot raw images with their original label IDs
def plot_raw_dataset(dataset, num_images=9):
    plt.figure(figsize=(10, 10))
    for i, example in enumerate(dataset.take(num_images)):
        image = example['image']
        label_ids = example['objects']['label']

        # Convert TensorFlow tensor to numpy and plot
        ax = plt.subplot(3, 3, i + 1)
        plt.imshow(image.numpy())
        label_text = ", ".join(str(label_id.numpy()) for label_id in label_ids)
        plt.title(f"Raw labels: {label_text}", fontsize=8)
        plt.axis("off")
    plt.tight_layout()
    plt.show()


In [None]:
# Model instantiation for 20 classes as per PASCAL VOC 2007 dataset
model = MobileNetV2(input_shape=(224, 224, 3), num_classes=20)
model.summary()

In [None]:


dataset, info = tfds.load(
        'voc/2007',
        data_dir='/root/tensorflow_datasets',  # Ensure this is the correct directory
        download=True,  # Set this to True to allow downloading the dataset
        split=['train', 'validation'],
        with_info=True
    )

label_names = info.features['objects']['label'].names
train_dataset, validation_dataset = dataset[0], dataset[1]

In [None]:


import matplotlib.pyplot as plt

# Plot raw images with their original label IDs
def plot_raw_dataset(dataset, num_images=9):
    plt.figure(figsize=(10, 10))
    for i, example in enumerate(dataset.take(num_images)):
        image = example['image']
        label_ids = example['objects']['label']

        # Convert TensorFlow tensor to numpy and plot
        ax = plt.subplot(3, 3, i + 1)
        plt.imshow(image.numpy())
        label_text = ", ".join(str(label_id.numpy()) for label_id in label_ids)
        plt.title(f"Raw labels: {label_text}", fontsize=8)
        plt.axis("off")
    plt.tight_layout()
    plt.show()


In [None]:

plot_raw_dataset(train_dataset)



# Exploratory Data Visualization (Before Cleaning)

## Why plot before preprocessing?

Before applying any preprocessing (such as resizing, normalization, augmentation), it's crucial to **visualize the raw dataset** because:

- It reveals **imbalances** among object classes (important for model fairness and performance).
- It exposes **variation in image sizes**, helping to decide appropriate resizing strategies.
- It helps detect **data issues** like extremely small or large images, missing labels, etc.
- It justifies **why** certain preprocessing steps are needed (not arbitrary).


In [None]:

# Plot Class Label Distribution using actual train_dataset

import collections
import matplotlib.pyplot as plt

label_names = info.features['objects']['label'].names
label_counter = collections.Counter()

for example in tfds.as_numpy(train_dataset):
    for obj in example['objects']['label']:
        label_counter[int(obj)] += 1

labels = list(label_counter.keys())
counts = list(label_counter.values())
label_names_plot = [label_names[idx] for idx in labels]

plt.figure(figsize=(14, 7))
plt.bar(label_names_plot, counts)
plt.title('Class Label Distribution (Before Preprocessing)')
plt.xlabel('Class Label')
plt.ylabel('Number of Instances')
plt.xticks(rotation=45, ha='right')
plt.grid(axis='y')
plt.tight_layout()
plt.show()




## Class Label Distribution

- This plot shows the number of training images available for each object class.
- Highly **imbalanced classes** can bias the model predictions towards majority classes.


In [None]:

# Plot Image Size Distribution before resizing

import matplotlib.pyplot as plt

image_widths = []
image_heights = []

for example in tfds.as_numpy(train_dataset):
    height, width, _ = example['image'].shape
    image_widths.append(width)
    image_heights.append(height)

plt.figure(figsize=(10,8))
plt.scatter(image_widths, image_heights, alpha=0.5)
plt.title('Image Width vs Height Distribution (Before Resizing)')
plt.xlabel('Width (pixels)')
plt.ylabel('Height (pixels)')
plt.grid(True)
plt.tight_layout()
plt.show()



## Image Width vs Height Distribution

- This scatter plot shows how image **widths** and **heights** vary across the dataset.
- Real-world datasets like VOC2007 have **non-uniform image sizes**.
- Therefore, **resizing images** to a common size (like 224×224) before feeding into CNNs is essential.


# Dataset Preprocessing with Image and Label Handling

In [None]:
# number of epochs, batch size, image size and Autotuning
BATCH_SIZE = 32
EPOCHS = 30
IMAGE_SIZE = 224
AUTOTUNE = tf.data.AUTOTUNE

#data preprocessing block
def preprocess_data(example, label_names):

    #image preprocessing sub-block below:-
    image = example['image']
    image = tf.cast(image, tf.float32)
    image = tf.image.resize(image, [IMAGE_SIZE, IMAGE_SIZE])
    image = (image / 127.5) - 1

    #labels pre-processing below
    labels = tf.zeros(len(label_names), dtype=tf.float32)
    objects = example['objects']
    label_ids = objects['label']

    #here we are creating one-hot encoding tensor
    for label_id in label_ids:
        labels = tf.tensor_scatter_nd_update(
            labels,
            [[label_id]],
            [1.0]
        )

    return image, labels

In [None]:

import matplotlib.pyplot as plt
import tensorflow as tf

# Helper: convert one-hot label vector back to class names
def decode_one_hot_labels(one_hot_tensor, label_names):
    indices = tf.where(one_hot_tensor == 1.0)
    return [label_names[i[0].numpy()] for i in indices]

# Plot preprocessed dataset
def plot_preprocessed_dataset(dataset, label_names, num_images=9):
    plt.figure(figsize=(10, 10))
    for i, (image, labels) in enumerate(dataset.take(num_images)):
        ax = plt.subplot(3, 3, i + 1)
        image = tf.squeeze(image)  # remove batch dimension if present
        plt.imshow((image + 1) / 2)  # scale image from [-1, 1] back to [0, 1]
        label_names_list = decode_one_hot_labels(labels, label_names)
        plt.title(", ".join(label_names_list), fontsize=8)
        plt.axis("off")
    plt.tight_layout()
    plt.show()


In [None]:

# Apply preprocessing function to the dataset
preprocessed_dataset = train_dataset.map(lambda x: preprocess_data(x, label_names))

# Plot the results
plot_preprocessed_dataset(preprocessed_dataset, label_names)


# Data Augmentation for Improved Generalization and Robustness

In [None]:
# performs data augmentation on the input images to improve model generalization and robustness
def apply_augmentation(image, label):
    image = tf.image.random_flip_left_right(image)
    image = tf.image.random_brightness(image, max_delta=0.2)
    image = tf.image.random_contrast(image, lower=0.8, upper=1.2)
    image = tf.clip_by_value(image, -1, 1)
    return image, label

In [None]:

import matplotlib.pyplot as plt

# Function to show side-by-side comparison: original vs augmented
def plot_original_vs_augmented(dataset, num_images=5):
    plt.figure(figsize=(12, 6))
    for i, (image, label) in enumerate(dataset.take(num_images)):
        # Original image
        ax = plt.subplot(num_images, 2, 2 * i + 1)
        plt.imshow(((image + 1) / 2).numpy())  # Convert from [-1, 1] to [0, 1]
        plt.title("Original", fontsize=10)
        plt.axis("off")

        # Augmented image
        augmented_image, _ = apply_augmentation(image, label)
        ax = plt.subplot(num_images, 2, 2 * i + 2)
        plt.imshow(((augmented_image + 1) / 2).numpy())
        plt.title("Augmented", fontsize=10)
        plt.axis("off")

    plt.tight_layout()
    plt.show()


In [None]:


# Show before-and-after augmentation to demonstrate variation and robustness
plot_original_vs_augmented(preprocessed_dataset)


# Dataset Preparation with Preprocessing, Augmentation, and Shuffling

In [None]:
#prepares proprocessing for images and labels in the below code and shuffles data as well
def prepare_dataset(dataset, label_names, is_training=True):
    dataset = dataset.map(lambda x: preprocess_data(x, label_names), num_parallel_calls=AUTOTUNE)
    if is_training:
        dataset = dataset.shuffle(10000)
        dataset = dataset.map(apply_augmentation, num_parallel_calls=AUTOTUNE)
    dataset = dataset.batch(BATCH_SIZE)
    dataset = dataset.prefetch(AUTOTUNE)
    return dataset

In [None]:


def compare_dataset_pipeline_fixed(raw_ds, label_names, num_images=5):
    # Collect a fixed list of raw examples
    raw_examples = list(raw_ds.take(num_images))

    plt.figure(figsize=(15, num_images * 3))

    for i, raw_example in enumerate(raw_examples):
        # --- Raw image and label ---
        raw_image = raw_example['image']
        raw_labels = raw_example['objects']['label']

        # --- Preprocessed image and label ---
        pre_image, pre_labels = preprocess_data(raw_example, label_names)

        # --- Augmented image and label ---
        aug_image, aug_labels = apply_augmentation(pre_image, pre_labels)

        # --- Plot raw ---
        ax = plt.subplot(num_images, 3, 3 * i + 1)
        plt.imshow(raw_image.numpy())
        label_ids = ", ".join(str(l.numpy()) for l in raw_labels)
        plt.title(f"Raw\nLabels: {label_ids}", fontsize=8)
        plt.axis("off")

        # --- Plot preprocessed ---
        ax = plt.subplot(num_images, 3, 3 * i + 2)
        plt.imshow(((pre_image + 1) / 2).numpy())
        pre_names = decode_one_hot_labels(pre_labels, label_names)
        plt.title("Preprocessed\n" + ", ".join(pre_names), fontsize=8)
        plt.axis("off")

        # --- Plot augmented ---
        ax = plt.subplot(num_images, 3, 3 * i + 3)
        plt.imshow(((aug_image + 1) / 2).numpy())
        aug_names = decode_one_hot_labels(aug_labels, label_names)
        plt.title("Prepared (Augmented)\n" + ", ".join(aug_names), fontsize=8)
        plt.axis("off")

    plt.tight_layout()
    plt.show()



In [None]:

# Compare same image through raw → preprocessed → augmented stages
compare_dataset_pipeline_fixed(train_dataset, label_names)


In [None]:
def label_preprocessing(image, labels):
    # Resize to 224x224
    image = tf.image.resize(image, (224, 224))
    image = tf.cast(image, tf.float32) / 255.0
    return image, labels

In [None]:
def data_augmentation(image, labels):
    image = tf.image.random_flip_left_right(image)
    image = tf.image.random_brightness(image, max_delta=0.1)
    image = tf.image.random_contrast(image, lower=0.9, upper=1.1)
    return image, labels


In [None]:
# 1. Preprocessing
train_dataset_processed = train_dataset.map(lambda x: label_preprocessing(x['image'], x['objects']['label']))

# 2. Augmentation
train_dataset_augmented = train_dataset_processed.map(lambda x, y: data_augmentation(x, y))

# 3. No batch, no prefetch here!
final_dataset_for_plotting = train_dataset_augmented

# Batch and prefetch separately for training
final_dataset = final_dataset_for_plotting.shuffle(1000).batch(32).prefetch(tf.data.AUTOTUNE)



In [None]:
# After Preprocessing: Plot Label Distribution

import collections
import matplotlib.pyplot as plt

label_counter_post = collections.Counter()

# Loop through normalized, one-hot preprocessed dataset
for image, labels in tfds.as_numpy(preprocessed_dataset):  # notice we use preprocessed_dataset now
    label_indices = np.where(labels == 1)[0]  # Get indices where label is 1 (one-hot)
    for idx in label_indices:
        label_counter_post[idx] += 1

# Prepare plotting
labels_post = list(label_counter_post.keys())
counts_post = list(label_counter_post.values())
label_names_plot_post = [label_names[idx] for idx in labels_post]

plt.figure(figsize=(14, 7))
plt.bar(label_names_plot_post, counts_post, color='skyblue')
plt.title('Class Label Distribution (After Normalization and One-Hot Encoding)')
plt.xlabel('Class Label')
plt.ylabel('Number of Instances')
plt.xticks(rotation=45, ha='right')
plt.grid(axis='y')
plt.tight_layout()
plt.show()

In [None]:
image_widths_post = []
image_heights_post = []

# Limit to first 500 samples for plotting
for image, labels in tfds.as_numpy(final_dataset_for_plotting):
    height, width, _ = image.shape
    image_widths_post.append(width)
    image_heights_post.append(height)

plt.figure(figsize=(10,8))
plt.scatter(image_widths_post, image_heights_post, alpha=0.5, color='orange')
plt.title('Image Width vs Height Distribution (After Resizing)')
plt.xlabel('Width (pixels)')
plt.ylabel('Height (pixels)')
plt.grid(True)
plt.tight_layout()
plt.show()

# Model Training

In [None]:
def main():
    output_dir = 'model_output'
    os.makedirs(output_dir, exist_ok=True)

    # Load dataset and retrieve label names dynamically, set download=True to allow TFDS to download
    dataset, info = tfds.load(
        'voc/2007',
        data_dir='/root/tensorflow_datasets',  # Ensure this is the correct directory
        download=True,  # Set this to True to allow downloading the dataset
        split=['train', 'validation'],
        with_info=True
    )

    label_names = info.features['objects']['label'].names
    train_dataset, validation_dataset = dataset[0], dataset[1]

    # Prepare the datasets
    train_dataset = prepare_dataset(train_dataset, label_names, is_training=True)
    validation_dataset = prepare_dataset(validation_dataset, label_names, is_training=False)

    print("Creating model...")
    model = MobileNetV2(input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3), num_classes=len(label_names))

    optimizer = Adam(learning_rate=0.001)
    model.compile(
        optimizer=optimizer,
        loss='binary_crossentropy',
        metrics=[
            tf.keras.metrics.BinaryAccuracy(name='accuracy'),
            tf.keras.metrics.AUC(name='auc'),
            tf.keras.metrics.Precision(name='precision'),
            tf.keras.metrics.Recall(name='recall')
        ]
    )

    # Define callbacks
    callbacks = [
        ModelCheckpoint(
            os.path.join(output_dir, 'mobilenetv2_pascal_best.keras'),
            monitor='val_accuracy',
            save_best_only=True,
            mode='max',
            verbose=1
        ),
        EarlyStopping(
            monitor='val_accuracy',
            patience=7,
            restore_best_weights=True,
            verbose=1
        ),
        ReduceLROnPlateau(
            monitor='val_loss',
            factor=0.2,
            patience=2,
            min_lr=1e-6,
            verbose=1
        )
    ]

    # Train the model
    print("\nStarting training...")
    history = model.fit(
        train_dataset,
        validation_data=validation_dataset,
        epochs=EPOCHS,
        callbacks=callbacks,
        verbose=1
    )

    model.save(os.path.join(output_dir, 'mobilenetv2_pascal_final.keras'))

    return history, train_dataset, label_names


# Run the main function with mixed precision
if __name__ == "__main__":
    policy = tf.keras.mixed_precision.Policy('mixed_float16')
    tf.keras.mixed_precision.set_global_policy(policy)

    history, train_dataset, label_names = main()



# Model Predictions with Actual Labels function:-

In [None]:
def display_more_predictions(model, dataset, label_names, num_images=15, threshold=0.3):
    # Initialize counter for displayed images
    displayed_count = 0

    # Loop through dataset batches
    for images, labels in dataset:
        predictions = model.predict(images)

        for i in range(len(images)):
            if displayed_count >= num_images:
                return

            plt.figure(figsize=(5, 5))
            plt.imshow((images[i] + 1) / 2)
            plt.axis('off')

            # show actual labels and the predictions:-
            actual_labels = [label_names[j] for j in range(len(label_names)) if labels[i][j] == 1]
            predicted_labels = [label_names[j] for j in range(len(label_names)) if predictions[i][j] > threshold]

            # when no labels is able to meet the model predictions:-
            if not predicted_labels:
                predicted_labels = ["No confident prediction"]

            plt.title(f"Actual: {', '.join(actual_labels)}\nPredicted: {', '.join(predicted_labels)}")
            plt.show()

            displayed_count += 1

# Test Dataset Preparation and Display the Model Predictions

In [None]:
# Load and prepare the test dataset
test_dataset, info = tfds.load(
    'voc/2007',
    data_dir='/root/tensorflow_datasets',
    download=False,
    split='test',
    with_info=True
)

# here we are getting the label names
label_names = info.features['objects']['label'].names

# preparartion of the test dataset
test_dataset = prepare_dataset(test_dataset, label_names=label_names, is_training=False)

# Loading the trained model to test o test dataset
model_path = '/content/model_output/mobilenetv2_pascal_best.keras'
model = tf.keras.models.load_model(model_path)

# Display predictions on more test images
display_more_predictions(model, test_dataset, label_names=label_names, num_images=15, threshold=0.3)

# Evaluation of Model Metrics

In [None]:
from tqdm import tqdm

# test dataset labels
label_names = info.features['objects']['label'].names

# Evaluation metrics
def evaluate_metrics_with_progress_bar(model, dataset, label_names, threshold=0.5):
    y_true = []
    y_pred = []
    y_scores = []
    accuracies = []

    # Create a progress bar
    for images, labels in tqdm(dataset, desc="Evaluating metrics", unit="batch"):
        # Get model predictions
        predictions = model(images, training=False)

        # threshold for predictions
        predictions = tf.cast(predictions > threshold, tf.int32)

        # accuracy for the batch
        batch_accuracy = np.mean(np.equal(np.argmax(predictions, axis=-1), np.argmax(labels, axis=-1)))
        accuracies.append(batch_accuracy)

        y_true.append(labels.numpy())
        y_pred.append(predictions.numpy())
        y_scores.append(predictions.numpy())

    # Convert to numpy arrays
    y_true = np.concatenate(y_true, axis=0)
    y_pred = np.concatenate(y_pred, axis=0)
    y_scores = np.concatenate(y_scores, axis=0)

    # Classification report
    report = classification_report(y_true, y_pred, target_names=label_names, output_dict=True, zero_division=1)

    # Compute AUC value for each class
    auc_scores = []
    for i in range(len(label_names)):
        auc = roc_auc_score(y_true[:, i], y_scores[:, i]) if y_true.shape[1] > i else None
        auc_scores.append(auc)

    return report, auc_scores, accuracies

# Generate Model Evaluation Report and AUC Scores

In [None]:
from tqdm import tqdm

# Call the function
report, auc_scores, accuracies = evaluate_metrics_with_progress_bar(model, test_dataset, label_names)

# Display plots for Precision, Recall and F-1 Score per class:-

In [None]:
# function for classification report
def plot_classification_report(report, label_names):
    metrics = ['precision', 'recall', 'f1-score']
    data = {metric: [] for metric in metrics}

    for class_name in label_names:
        for metric in metrics:
            data[metric].append(report[class_name][metric])

    # Plots for precision, recall and F-1 Score:-
    fig, axes = plt.subplots(1, len(metrics), figsize=(18, 5))
    for i, metric in enumerate(metrics):
        axes[i].bar(label_names, data[metric], color='skyblue')
        axes[i].set_title(f'{metric.capitalize()} per Class')
        axes[i].set_xticks(range(len(label_names)))
        axes[i].set_xticklabels(label_names, rotation=45, ha="right")
        axes[i].set_xlabel('Classes')
        axes[i].set_ylabel(f'{metric.capitalize()}')
    plt.tight_layout()
    plt.show()


plot_classification_report(report, label_names)

# Visualization of AUC Scores for Each Class

In [None]:
def plot_auc_scores(auc_scores, label_names):
    plt.figure(figsize=(10, 6))
    plt.bar(label_names, auc_scores, color='lightcoral')
    plt.title('AUC Scores per Class')
    plt.xticks(rotation=45, ha="right")
    plt.xlabel('Classes')
    plt.ylabel('AUC')
    plt.tight_layout()
    plt.show()

plot_auc_scores(auc_scores, label_names)

# Plotting Model Accuracy per Batch

In [None]:
def plot_accuracy(accuracies):
    plt.figure(figsize=(10, 6))
    plt.plot(accuracies, label='Accuracy per Batch', color='green')
    plt.title('Model Accuracy per Batch')
    plt.xlabel('Batch')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.tight_layout()
    plt.show()

plot_accuracy(accuracies)

# Precision, Recall, F-1 and AUC scores for all classes:-

In [None]:
# Print the final outputs
print("\nFinal Classification Report:")
for class_name, metrics in report.items():
    print(f"{class_name}: {metrics}")

print("\nAUC Scores:")
for idx, auc in enumerate(auc_scores):
    if auc is not None:
        print(f"AUC for {label_names[idx]}: {auc:.4f}")
    else:
        print(f"AUC for {label_names[idx]}: Not computable")

# Mean Intersection over Union (mIOU)

In [None]:
from tqdm import tqdm

# Function to calculate mIOU for the model with a progress bar
def calculate_mean_iou(model, dataset, num_classes):
    # Initialize MeanIoU metric
    miou_metric = tf.keras.metrics.MeanIoU(num_classes=num_classes)

    # Create a progress bar
    dataset = tqdm(dataset, desc="Processing batches", unit="batch")

    # Iterate over test dataset
    for images, labels in dataset:
        predictions = model.predict(images, verbose=0)
        predicted_labels = tf.argmax(predictions, axis=-1) #predicted lables
        true_labels = tf.argmax(labels, axis=-1) #true labels

        # Update the mIOU metric with the true and predicted labels
        miou_metric.update_state(true_labels, predicted_labels)

    # final #mIOU score
    mean_iou = miou_metric.result().numpy()
    print(f"Mean Intersection over Union (mIOU): {mean_iou:.4f}")
    return mean_iou

# Load the test dataset
num_classes = 20
mean_iou = calculate_mean_iou(model, test_dataset, num_classes)


# Calculating Mean Intersection over Union (mIOU) and Per-Class mIOU values

In [None]:
from tqdm import tqdm

# mIOU per class
def calculate_mean_iou_per_class(model, dataset, num_classes):
    # Initialize MeanIoU metric
    confusion_matrix = tf.zeros((num_classes, num_classes), dtype=tf.int32)
    dataset = tqdm(dataset, desc="Processing batches", unit="batch")

    # loop over test dataset
    for images, labels in dataset:
        predictions = model.predict(images, verbose=0)
        predicted_labels = tf.argmax(predictions, axis=-1)
        true_labels = tf.argmax(labels, axis=-1)

        # Update the confusion matrix
        for true, pred in zip(tf.reshape(true_labels, [-1]), tf.reshape(predicted_labels, [-1])):
            if true < num_classes and pred < num_classes:  # Ensure labels are valid
                confusion_matrix = tf.tensor_scatter_nd_add(
                    confusion_matrix, [[true, pred]], [1]
                )

    # mIOU per class
    intersection = tf.linalg.diag_part(confusion_matrix)
    union = tf.reduce_sum(confusion_matrix, axis=0) + tf.reduce_sum(confusion_matrix, axis=1) - intersection
    iou_per_class = intersection / tf.maximum(union, 1)

    # Mean mIOU across classes
    mean_iou = tf.reduce_mean(iou_per_class).numpy()

    print(f"Mean Intersection over Union (mIOU): {mean_iou:.4f}")
    print("Per-Class mIOU:")
    for i, iou in enumerate(iou_per_class.numpy()):
        print(f"{class_names[i]}: mIOU = {iou:.4f}")

    return mean_iou, iou_per_class.numpy()


# 20 classes
num_classes = 20
class_names = [
    "Aeroplane", "Bicycle", "Bird", "Boat", "Bottle", "Bus", "Car", "Cat", "Chair", "Cow",
    "Dining Table", "Dog", "Horse", "Motorbike", "Person", "Potted Plant", "Sheep", "Sofa", "Train", "TV Monitor"
]

mean_iou, iou_per_class = calculate_mean_iou_per_class(model, test_dataset, num_classes)

# Plotting Per-Class Mean Intersection over Union (mIOU) Scores

In [None]:
# plot per class mIOU scores
def plot_per_class_miou(iou_per_class, class_names):
    plt.figure(figsize=(10, 6))
    plt.bar(class_names, iou_per_class, color="blue")
    plt.title("Per-Class Mean Intersection over Union (mIOU)")
    plt.ylim(0, 1)
    plt.ylabel("mIOU Score")
    plt.xlabel("Classes")
    plt.xticks(rotation=45, ha="right")
    plt.tight_layout()
    plt.show()

# Plot the per-class mIOU scores
plot_per_class_miou(iou_per_class, class_names)