<a href="https://colab.research.google.com/github/imjohnson1/imjohnson1.github.io/blob/main/MobileNet_ML.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

MobileNetV2 Setup

# Original 3-Class MobileNet Model


Library Import & Path Mount

In [39]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint


from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

# Access dataset
dataset_path = '/content/drive/MyDrive/Group13CST498/Iyauna_MLM/attacked_images_resplit'

# Verify the path
import os
print(os.listdir(dataset_path))

# Outputs folder
import os

output_dir = "/content/drive/MyDrive/Group13CST498/mNet_output"
os.makedirs(output_dir, exist_ok=True)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
['train', 'valid', 'test']


Paths & Class Count

In [40]:
clean_path = "/content/drive/MyDrive/Group13CST498/Iyauna_MLM/extracted_images_resplit"
attack_path = "/content/drive/MyDrive/Group13CST498/Iyauna_MLM/attacked_images_resplit"

num_classes = 3   # stop, speed_limit, uncategorized

In [41]:
import os

# Path to your dataset in Google Drive
dataset_path = "/content/drive/MyDrive/Group13CST498/Iyauna_MLM/extracted_images_resplit"

# Check the contents of the dataset folder
print("Contents of Iyauna's extracted images resplit folder:")
print(os.listdir(dataset_path))

# Check deeper structure (train/valid/test and their categories)
for split in os.listdir(dataset_path):
    split_path = os.path.join(dataset_path, split)
    if os.path.isdir(split_path):
        print(f"\n{split} folder contains:")
        for category in os.listdir(split_path):
            category_path = os.path.join(split_path, category)
            if os.path.isdir(category_path):
                num_images = len(os.listdir(category_path))
                print(f"  {category}: {num_images} images")


Contents of Iyauna's extracted images resplit folder:
['train', 'valid', 'test']

train folder contains:
  stop: 273 images
  speed_limit: 361 images
  uncategorized: 266 images

valid folder contains:
  stop: 33 images
  speed_limit: 45 images
  uncategorized: 75 images

test folder contains:
  stop: 33 images
  speed_limit: 46 images
  uncategorized: 76 images


Data Generators & Model Loading

In [42]:
# Data Generator
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input

train_datagen = ImageDataGenerator(
  preprocessing_function=preprocess_input,
  rotation_range=20,
  width_shift_range=0.2,
  height_shift_range=0.2,
  zoom_range=0.2,
  brightness_range=[0.7,1.3],
  shear_range=0.2,
  horizontal_flip=False,
  fill_mode='nearest'
)
valid_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)
test_datagen  = ImageDataGenerator(preprocessing_function=preprocess_input)

train_gen = train_datagen.flow_from_directory(
    clean_path + "/train",
    target_size=(224, 224),
    batch_size=16,
    class_mode='categorical'
)

valid_gen = valid_datagen.flow_from_directory(
    clean_path + "/valid",
    target_size=(224, 224),
    batch_size=16,
    class_mode='categorical'
)

test_gen = test_datagen.flow_from_directory(
    clean_path + "/test",
    target_size=(224, 224),
    batch_size=16,
    class_mode='categorical'
)

attack_gen = test_datagen.flow_from_directory(
    attack_path + "/test",
    target_size=(224, 224),
    batch_size=16,
    class_mode='categorical'
)

# Load Model Core (Pre-trained Model)
base_model = MobileNetV2(
    input_shape=(224, 224, 3),
    include_top=False,
    weights='imagenet'
)
base_model.trainable = False  # freezes base layers


Found 900 images belonging to 3 classes.
Found 153 images belonging to 3 classes.
Found 155 images belonging to 3 classes.
Found 264 images belonging to 3 classes.


Add Classification Layers

In [43]:
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dropout(0.3)(x)
x = Dense(128, activation='relu')(x)
x = Dropout(0.3)(x)
preds = Dense(num_classes, activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=preds)


Compile model

In [44]:
# Compile Model
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

model.summary()

Train & Compile Model

In [45]:
# -----------------------------
# Train Model
# -----------------------------
import os
from datetime import datetime
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

# Create timestamped run folder
run_time = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
run_dir = f"/content/drive/MyDrive/Group13CST498/training_runs/{run_time}"
os.makedirs(run_dir, exist_ok=True)
print("Saving this run to:", run_dir)

# ------------- Prints out recent saved accuracy ----- #
class VerboseValAccCallback(tf.keras.callbacks.Callback):
    def __init__(self):
        super().__init__()
        self.best = -1

    def on_epoch_end(self, epoch, logs=None):
        val_acc = logs.get("val_accuracy")
        if val_acc is None:
            return

        if val_acc > self.best:
            old = self.best
            self.best = val_acc
            if old == -1:
                print(f"val_accuracy starts at {val_acc:.5f}")
            else:
                print(f"val_accuracy improved from {old:.5f} to {val_acc:.5f}")

# CALLBACKS
callbacks = [
    EarlyStopping(
        monitor='val_accuracy',
        patience=6,
        restore_best_weights=True
    ),
    ModelCheckpoint(
        filepath=f"{run_dir}/best_mobilenetv2.keras",
        monitor='val_accuracy',
        save_best_only=True
    ),
    VerboseValAccCallback()
]

# ---------- MODEL.FIT START
history = model.fit(
    train_gen,
    validation_data=valid_gen,
    epochs=40,
    callbacks=callbacks
)

# -----------------------------
# Unfreezes Top Layers
# -----------------------------
# Fine‑tune the top MobileNetV2 layers
base_model.trainable = True

# Freeze bottom ~100 layers (keeps training stable)
# Changed to 50 to make model more flexible
for layer in base_model.layers[:50]:
    layer.trainable = False

# Recompile with a LOWER learning rate
model.compile(
    optimizer=tf.keras.optimizers.Adam(1e-5),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# -----------------------------
# PHASE 2: FINE‑TUNE MODEL
# -----------------------------
fine_tune_history = model.fit(
  train_gen,
  validation_data=valid_gen,
  epochs=20, # smaller number for fine‑tuning
  callbacks=callbacks
)

# -----------------------------
# Highest Accuracy Summary
# -----------------------------
best_train_acc = max(history.history['accuracy'])
best_val_acc = max(history.history['val_accuracy'])

print(f"Highest Train Accuracy: {best_train_acc * 100:.2f}%")
print(f"Highest Validation Accuracy: {best_val_acc * 100:.2f}%")

best_epoch_train = history.history['accuracy'].index(best_train_acc) + 1
best_epoch_val = history.history['val_accuracy'].index(best_val_acc) + 1

print(f"Best Train Accuracy Epoch: {best_epoch_train}")
print(f"Best Validation Accuracy Epoch: {best_epoch_val}")

# Save summary
with open(f"{run_dir}/summary.txt", "w") as f:
    f.write(f"Highest Train Accuracy: {best_train_acc * 100:.2f}%\n")
    f.write(f"Highest Validation Accuracy: {best_val_acc * 100:.2f}%\n")
    f.write(f"Best Train Accuracy Epoch: {best_epoch_train}\n")
    f.write(f"Best Validation Accuracy Epoch: {best_epoch_val}\n")

# -----------------------------
# Save training log
# -----------------------------
history_df = pd.DataFrame(history.history)
history_df.to_csv(f"{run_dir}/training_log.csv", index=False)

# -----------------------------
# Prediction Grid
# -----------------------------
test_gen.reset()
attack_gen.reset()

def show_colored_predictions(generator, title, filename=None):
    x, y = next(generator)
    preds = model.predict(x)
    pred_classes = np.argmax(preds, axis=1)
    true_classes = np.argmax(y, axis=1)
    labels = list(generator.class_indices.keys())

    plt.figure(figsize=(12,12))
    for i in range(9):
        plt.subplot(3,3,i+1)
        plt.imshow(x[i])
        correct = pred_classes[i] == true_classes[i]
        color = "green" if correct else "red"
        plt.title(
            f"True: {labels[true_classes[i]]}\nPred: {labels[pred_classes[i]]}",
            color=color
        )
        plt.axis('off')

    plt.suptitle(title, fontsize=16)

    if filename is not None:
        plt.savefig(filename)
    plt.show()

# -----------------------------
# Accuracy plot
# -----------------------------
plt.figure(figsize=(8,5))
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title("Training vs Validation Accuracy")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.legend()
plt.savefig(f"{run_dir}/accuracy_plot.png")
plt.show()

# -------------------
# RELOAD SAVED MODEL
# -------------------
model = tf.keras.models.load_model(f"{run_dir}/best_mobilenetv2.keras")

# # Loss plot
# plt.figure(figsize=(8,5))
# plt.plot(history.history['loss'], label='Train Loss')
# plt.plot(history.history['val_loss'], label='Validation Loss')
# plt.title("Training vs Validation Loss")
# plt.xlabel("Epoch")
# plt.ylabel("Loss")
# plt.legend()
# plt.savefig(f"{output_dir}/loss_plot.png")
# plt.show()

Saving this run to: /content/drive/MyDrive/Group13CST498/training_runs/2026-02-21_02-04-17


  self._warn_if_super_not_called()


Epoch 1/40


KeyboardInterrupt: 

In [None]:
model = tf.keras.models.load_model('/content/drive/MyDrive/Group13CST498/best_mobilenetv2.keras')


Prediction Sorting Script

In [None]:
import os
import shutil
import numpy as np

# Path where sorted predictions will be saved
sort_dir = "/content/drive/MyDrive/Group13CST498/sorted_predictions"
correct_dir = os.path.join(sort_dir, "correct")
incorrect_dir = os.path.join(sort_dir, "incorrect")

os.makedirs(correct_dir, exist_ok=True)
os.makedirs(incorrect_dir, exist_ok=True)

# Reset generator
test_gen.reset()

# Predict on entire test set
preds = model.predict(test_gen)
pred_classes = np.argmax(preds, axis=1)
true_classes = test_gen.classes
filenames = test_gen.filenames
labels = list(test_gen.class_indices.keys())

# Loop through predictions
for i, fname in enumerate(filenames):
    src_path = os.path.join(test_gen.directory, fname)
    pred_label = labels[pred_classes[i]]
    true_label = labels[true_classes[i]]

    # Build destination filename
    base = os.path.basename(fname)
    new_name = f"true-{true_label}_pred-{pred_label}_{base}"

    if pred_label == true_label:
        shutil.copy(src_path, os.path.join(correct_dir, new_name))
    else:
        shutil.copy(src_path, os.path.join(incorrect_dir, new_name))

print("Sorting complete!")
print(f"Correct predictions saved to: {correct_dir}")
print(f"Incorrect predictions saved to: {incorrect_dir}")


In [None]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, classification_report

# -----------------------------
# 1. ACCURACY PLOT
# -----------------------------
plt.figure(figsize=(8,5))
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title("Training vs Validation Accuracy")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.legend()
plt.grid(True)
plt.show()

# -----------------------------
# 2. LOSS PLOT
# -----------------------------
plt.figure(figsize=(8,5))
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title("Training vs Validation Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.grid(True)
plt.show()

# -----------------------------
# 3. CONFUSION MATRIX (TEST SET)
# -----------------------------
test_preds = model.predict(test_gen)
test_labels = test_gen.classes
pred_classes = np.argmax(test_preds, axis=1)

cm = confusion_matrix(test_labels, pred_classes)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=list(test_gen.class_indices.keys()))
disp.plot(cmap='Blues', xticks_rotation=45)
plt.title("Confusion Matrix (Clean Test Set)")
plt.show()

# -----------------------------
# 4. PER-CLASS ACCURACY BAR CHART
# -----------------------------
class_accuracy = cm.diagonal() / cm.sum(axis=1)

plt.figure(figsize=(8,5))
plt.bar(list(test_gen.class_indices.keys()), class_accuracy, color=['red','blue','gray'])
plt.ylim(0,1)
plt.title("Per-Class Accuracy (Clean Test Set)")
plt.ylabel("Accuracy")
plt.grid(axis='y')
plt.show()


Eval on Clean Images

In [None]:
plt.figure(figsize=(8,5))
plt.hist(np.max(test_preds, axis=1), bins=20, alpha=0.7, label="Clean Images")
plt.hist(np.max(model.predict(attack_gen), axis=1), bins=20, alpha=0.7, label="Attacked Images")
plt.title("Prediction Confidence Distribution")
plt.xlabel("Max Softmax Probability")
plt.ylabel("Frequency")
plt.legend()
plt.grid(True)
plt.show()


Eval on Attacked Images

In [None]:
# --- CLEAN vs ATTACKED ACCURACY COMPARISON ---

clean_loss, clean_acc = model.evaluate(test_gen, verbose=0)
attack_loss, attack_acc = model.evaluate(attack_gen, verbose=0)

plt.figure(figsize=(6,5))
plt.bar(["Clean Images", "Attacked Images"], [clean_acc, attack_acc],
        color=["green", "orange"])
plt.ylim(0,1)
plt.title("Clean vs Attacked Accuracy")
plt.ylabel("Accuracy")
plt.grid(axis='y')
plt.show()

print(f"Clean Accuracy:   {clean_acc:.4f}")
print(f"Attacked Accuracy:{attack_acc:.4f}")
print(f"Accuracy Drop:    {clean_acc - attack_acc:.4f}")


bar

In [None]:
import os
import shutil
import random

# -----------------------------
# Paths
# -----------------------------
src_root = "/content/drive/MyDrive/Group13CST498/Iyauna_MLM/attacked_images_resplit"
dst_root = "/content/drive/MyDrive/Group13CST498/Iyauna_MLM/attacked_images_resplit"

classes_to_keep = ["stop", "speed_limit", "uncategorized"]
extra_class = "crosswalk"
ignore_folders = ["backup"]

splits = ["train", "valid", "test"]
split_ratios = {"train": 0.70, "valid": 0.15, "test": 0.15}

# -----------------------------
# Create destination folders
# -----------------------------
for split in splits:
    for cls in classes_to_keep:
        os.makedirs(os.path.join(dst_root, split, cls), exist_ok=True)

print("Created folder structure.")

# -----------------------------
# Gather ALL images from train/valid/test
# -----------------------------
all_images = {cls: [] for cls in classes_to_keep}

for split in ["train", "valid", "test"]:
    split_path = os.path.join(src_root, split)

    for cls in os.listdir(split_path):
        if cls in ignore_folders:
            continue

        cls_path = os.path.join(split_path, cls)
        if not os.path.isdir(cls_path):
            continue

        # Determine final class name
        if cls == extra_class:
            final_class = "uncategorized"
        elif cls in classes_to_keep:
            final_class = cls
        else:
            print(f"Skipping unknown class: {cls}")
            continue

        # Collect images
        imgs = [
            os.path.join(cls_path, img)
            for img in os.listdir(cls_path)
            if img.lower().endswith((".jpg", ".jpeg", ".png"))
        ]

        all_images[final_class].extend(imgs)

print("Collected all images.")

# -----------------------------
# Shuffle and split
# -----------------------------
for cls, imgs in all_images.items():
    random.shuffle(imgs)

    n = len(imgs)
    n_train = int(n * split_ratios["train"])
    n_valid = int(n * split_ratios["valid"])
    n_test = n - n_train - n_valid

    train_imgs = imgs[:n_train]
    valid_imgs = imgs[n_train:n_train+n_valid]
    test_imgs = imgs[n_train+n_valid:]

    # Copy to destination
    for f in train_imgs:
        shutil.copy(f, os.path.join(dst_root, "train", cls))
    for f in valid_imgs:
        shutil.copy(f, os.path.join(dst_root, "valid", cls))
    for f in test_imgs:
        shutil.copy(f, os.path.join(dst_root, "test", cls))

    print(f"{cls}: {len(train_imgs)} train, {len(valid_imgs)} valid, {len(test_imgs)} test")

print("\n🎉 Attacked dataset successfully restructured!")
print("Saved to:", dst_root)


Predictions Grids (Clean + Attacked)

In [None]:
# Clean
show_colored_predictions(test_gen,
                         "Clean Test Predictions",
                         "clean_prediction_grid.png")

# Attacked
show_colored_predictions(attack_gen,
                         "Attacked Image Predictions",
                         "attacked_prediction_grid.png")


# 2-Class MobileNet Model

2-Class File Restructure

In [46]:
# import os
# import shutil
# import random

# # -----------------------------
# # Paths
# # -----------------------------
# src_root = "/content/drive/MyDrive/Group13CST498/Iyauna_MLM/extracted_images_resplit"
# dst_root = "/content/drive/MyDrive/Group13CST498/Iyauna_MLM/clean_2class"

# splits = ["train", "valid", "test"]
# classes = ["stop", "speed_limit"]

# # -----------------------------
# # Create destination folders
# # -----------------------------
# for split in splits:
#     for cls in classes:
#         os.makedirs(os.path.join(dst_root, split, cls), exist_ok=True)

# print("Created 2-class folder structure.")

# # -----------------------------
# # Copy only stop + speed_limit
# # -----------------------------
# for split in splits:
#     for cls in classes:
#         src_folder = os.path.join(src_root, split, cls)
#         dst_folder = os.path.join(dst_root, split, cls)

#         images = [
#             img for img in os.listdir(src_folder)
#             if img.lower().endswith((".jpg", ".jpeg", ".png"))
#         ]

#         for img in images:
#             shutil.copy(
#                 os.path.join(src_folder, img),
#                 os.path.join(dst_folder, img)
#             )

#         print(f"{split}/{cls}: {len(images)} images copied.")

# print("\n🎉 2-class dataset created successfully!")
# print("Saved to:", dst_root)


Created 2-class folder structure.
train/stop: 273 images copied.
train/speed_limit: 361 images copied.
valid/stop: 33 images copied.
valid/speed_limit: 45 images copied.
test/stop: 33 images copied.
test/speed_limit: 46 images copied.

🎉 2-class dataset created successfully!
Saved to: /content/drive/MyDrive/Group13CST498/Iyauna_MLM/clean_2class


Generators w/ Augmentation

In [47]:
# -----------------------------
# 2-Class Generators
# -----------------------------
two_class_path = "/content/drive/MyDrive/Group13CST498/Iyauna_MLM/clean_2class"

train_datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    zoom_range=0.2,
    brightness_range=[0.7, 1.3],
    shear_range=0.2,
    fill_mode='nearest'
)

valid_datagen = ImageDataGenerator()

train_gen = train_datagen.flow_from_directory(
    two_class_path + "/train",
    target_size=(224,224),
    batch_size=32,
    class_mode="categorical"
)

valid_gen = valid_datagen.flow_from_directory(
    two_class_path + "/valid",
    target_size=(224,224),
    batch_size=32,
    class_mode="categorical"
)

test_gen = valid_datagen.flow_from_directory(
    two_class_path + "/test",
    target_size=(224,224),
    batch_size=32,
    class_mode="categorical",
    shuffle=False
)


Found 634 images belonging to 2 classes.
Found 78 images belonging to 2 classes.
Found 79 images belonging to 2 classes.


Model Definition (2-class output)

In [48]:
# -----------------------------
# 2-Class Model
# -----------------------------
base_model = tf.keras.applications.MobileNetV2(
    input_shape=(224,224,3),
    include_top=False,
    weights="imagenet"
)
base_model.trainable = False

x = tf.keras.layers.GlobalAveragePooling2D()(base_model.output)
x = tf.keras.layers.Dropout(0.2)(x)
output = tf.keras.layers.Dense(2, activation="softmax")(x)

model = tf.keras.Model(inputs=base_model.input, outputs=output)

model.compile(
    optimizer=tf.keras.optimizers.Adam(1e-3),
    loss="categorical_crossentropy",
    metrics=["accuracy"]
)

model.summary()


Training (Phase 1 + Phase 2)

In [None]:
# -----------------------------
# Create 2-Class Runtime Folder
# -----------------------------
import os
from datetime import datetime

run_time = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
run_dir = f"/content/drive/MyDrive/Group13CST498/Iyauna_MLM/2class_runtimes/{run_time}"
os.makedirs(run_dir, exist_ok=True)

print("Saving this 2-class run to:", run_dir)

# -----------------------------
# CALLBACKS
# -----------------------------
callbacks = [
    EarlyStopping(
        monitor='val_accuracy',
        patience=6,
        restore_best_weights=True
    ),
    ModelCheckpoint(
        filepath=f"{run_dir}/best_2class_model.keras",   # <-- NEW SAVE LOCATION
        monitor='val_accuracy',
        save_best_only=True
    ),
    VerboseValAccCallback()
]

# -----------------------------
# Train Model (Phase 1)
# -----------------------------
history = model.fit(
    train_gen,
    validation_data=valid_gen,
    epochs=40,
    callbacks=callbacks
)

# -----------------------------
# Fine-Tuning (Phase 2)
# -----------------------------
base_model.trainable = True

for layer in base_model.layers[:50]:
    layer.trainable = False

model.compile(
    optimizer=tf.keras.optimizers.Adam(1e-5),
    loss="categorical_crossentropy",
    metrics=["accuracy"]
)

fine_tune_history = model.fit(
    train_gen,
    validation_data=valid_gen,
    epochs=20,
    callbacks=callbacks
)

# -----------------------------
# Save Training Log
# -----------------------------
import pandas as pd

history_df = pd.DataFrame(history.history)
fine_df = pd.DataFrame(fine_tune_history.history)

combined_df = pd.concat([history_df, fine_df], ignore_index=True)
combined_df.to_csv(f"{run_dir}/training_log.csv", index=False)

print("Saved training log.")

# -----------------------------
# Accuracy Plot
# -----------------------------
import matplotlib.pyplot as plt

plt.figure(figsize=(8,5))
plt.plot(combined_df['accuracy'], label='Train Accuracy')
plt.plot(combined_df['val_accuracy'], label='Validation Accuracy')
plt.title("Training vs Validation Accuracy (2-Class)")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.legend()
plt.grid(True)
plt.savefig(f"{run_dir}/accuracy_plot.png")
plt.close()

# -----------------------------
# Loss Plot
# -----------------------------
plt.figure(figsize=(8,5))
plt.plot(combined_df['loss'], label='Train Loss')
plt.plot(combined_df['val_loss'], label='Validation Loss')
plt.title("Training vs Validation Loss (2-Class)")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.grid(True)
plt.savefig(f"{run_dir}/loss_plot.png")
plt.close()

print("Saved accuracy and loss plots.")

# -----------------------------
# Summary File
# -----------------------------
best_train_acc = max(combined_df['accuracy'])
best_val_acc = max(combined_df['val_accuracy'])

best_epoch_train = combined_df['accuracy'].idxmax() + 1
best_epoch_val = combined_df['val_accuracy'].idxmax() + 1

with open(f"{run_dir}/summary.txt", "w") as f:
    f.write(f"Highest Train Accuracy: {best_train_acc * 100:.2f}%\n")
    f.write(f"Highest Validation Accuracy: {best_val_acc * 100:.2f}%\n")
    f.write(f"Best Train Accuracy Epoch: {best_epoch_train}\n")
    f.write(f"Best Validation Accuracy Epoch: {best_epoch_val}\n")

print("Saved summary.")


Clean Test Eval

In [None]:
# -----------------------------
# Clean Test Accuracy
# -----------------------------
test_gen.reset()
loss, acc = model.evaluate(test_gen)
print(f"Clean Test Accuracy: {acc * 100:.2f}%")


Prediction Grids + Save

In [None]:
# -----------------------------
# Raw vs Augmented Prediction Grid (2-Class)
# -----------------------------
import matplotlib.pyplot as plt
import numpy as np
from tensorflow.keras.preprocessing import image

def show_raw_vs_augmented(generator, title, filename, num_images=9):
    # Get a batch from the generator (augmented images)
    x_aug, y_true = next(generator)
    labels = list(generator.class_indices.keys())

    plt.figure(figsize=(12, 12))

    for i in range(num_images):
        # -----------------------------
        # RAW IMAGE (left column)
        # -----------------------------
        raw_path = generator.filepaths[i]
        raw_img = image.load_img(raw_path, target_size=(224, 224))
        raw_arr = image.img_to_array(raw_img).astype("uint8")

        # -----------------------------
        # AUGMENTED IMAGE (right column)
        # -----------------------------
        aug_img = x_aug[i]

        # Model prediction on augmented image
        preds = model.predict(np.expand_dims(aug_img, axis=0), verbose=0)
        pred_class = np.argmax(preds)
        true_class = np.argmax(y_true[i])
        correct = pred_class == true_class
        color = "green" if correct else "red"

        # -----------------------------
        # Plot RAW
        # -----------------------------
        plt.subplot(num_images//3, 6, i*2 + 1)
        plt.imshow(raw_arr.astype("uint8"))
        plt.title("RAW", fontsize=10)
        plt.axis("off")

        # -----------------------------
        # Plot AUGMENTED
        # -----------------------------
        plt.subplot(num_images//3, 6, i*2 + 2)
        plt.imshow(aug_img)
        plt.title(
            f"Pred: {labels[pred_class]}\nTrue: {labels[true_class]}",
            color=color,
            fontsize=10
        )
        plt.axis("off")

    plt.suptitle(title, fontsize=16)
    plt.tight_layout()
    plt.savefig(f"{run_dir}/{filename}")
    plt.close()

# Save raw vs augmented grid
test_gen.reset()
show_raw_vs_augmented(test_gen, "Raw vs Augmented (2-Class)", "raw_vs_augmented.png")


Evaluate Clean Test Accuracy

In [None]:
# -----------------------------
# Clean Test Accuracy
# -----------------------------
test_gen.reset()
loss, acc = model.evaluate(test_gen)
print(f"Clean Test Accuracy (2-Class): {acc * 100:.2f}%")

with open(f"{run_dir}/summary.txt", "a") as f:
    f.write(f"Clean Test Accuracy: {acc * 100:.2f}%\n")


Duplicate check

In [49]:
import os
from collections import Counter

# -----------------------------
# Paths
# -----------------------------
root = "/content/drive/MyDrive/Group13CST498/Iyauna_MLM/clean_2class"
splits = ["train", "valid", "test"]
classes = ["stop", "speed_limit"]

print("🔍 Checking for duplicate filenames...\n")

duplicate_report = {}

for split in splits:
    for cls in classes:
        folder = os.path.join(root, split, cls)
        files = os.listdir(folder)

        # Count occurrences of each filename
        counts = Counter(files)
        duplicates = [f for f, c in counts.items() if c > 1]

        key = f"{split}/{cls}"
        duplicate_report[key] = duplicates

        if duplicates:
            print(f"⚠️  Duplicates found in {key}:")
            for d in duplicates:
                print(f"   - {d}")
            print()
        else:
            print(f"✅ No duplicates in {key}\n")

print("\n📄 Duplicate check complete.")


🔍 Checking for duplicate filenames...

✅ No duplicates in train/stop

✅ No duplicates in train/speed_limit

✅ No duplicates in valid/stop

✅ No duplicates in valid/speed_limit

✅ No duplicates in test/stop

✅ No duplicates in test/speed_limit


📄 Duplicate check complete.
