In [None]:
# William Goggins - S00248401

import os
import shutil
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Flatten, Dense
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

# Paths
gdrive_csv_path = "/content/drive/MyDrive/Colab_Files/Test.csv"
content_csv_path = "/content/ML_CSV_Files/Test.csv"
organised_test_csv_path = "/content/ML_CSV_Files/organised_test.csv"
original_train_path = "/root/.cache/kagglehub/datasets/meowmeowmeowmeowmeow/gtsrb-german-traffic-sign/versions/1/Train"
filtered_train_path = "/content/Train_All_Categories"
original_test_path = "/root/.cache/kagglehub/datasets/meowmeowmeowmeowmeow/gtsrb-german-traffic-sign/versions/1/Test"
organised_test_path = "/content/Test_Organised"

# Setup and Check Google Drive for Test.csv
def setup_test_csv():
    print("[INFO] Mounting Google Drive...")
    from google.colab import drive
    drive.mount('/content/drive')

    print("[INFO] Google Drive mounted successfully.")

    # Ensure destination directory exists
    os.makedirs(os.path.dirname(content_csv_path), exist_ok=True)

    if os.path.exists(gdrive_csv_path):
        shutil.copy(gdrive_csv_path, content_csv_path)
        print(f"[SUCCESS] Test.csv copied to {content_csv_path}.")
    else:
        raise FileNotFoundError(f"[ERROR] Test.csv not found at {gdrive_csv_path}. Please ensure it exists.")

# Save Organised Test CSV
def save_organised_test_csv():
    print("[INFO] Saving organised test CSV...")
    test_df = pd.read_csv(content_csv_path)
    test_df.to_csv(organised_test_csv_path, index=False)
    print(f"[SUCCESS] Organised test CSV saved to {organised_test_csv_path}.")

# Reorganise Training Data for All Classes
def setup_kaggle_dataset():
    print("[INFO] Checking for Kaggle dataset...")
    dataset_path = os.path.dirname(original_train_path)

    if not os.path.exists(dataset_path):
        print("[INFO] Kaggle dataset not found. Downloading...")
        import kagglehub
        kagglehub.dataset_download("meowmeowmeowmeowmeow/gtsrb-german-traffic-sign")
        print("[SUCCESS] Kaggle dataset downloaded successfully.")
    else:
        print("[INFO] Kaggle dataset already exists.")

def filter_train_data():
    print("[INFO] Filtering training data for all categories...")
    setup_kaggle_dataset()
    os.makedirs(filtered_train_path, exist_ok=True)

    for class_id in sorted(os.listdir(original_train_path)):
        src_class_dir = os.path.join(original_train_path, class_id)
        dst_class_dir = os.path.join(filtered_train_path, class_id)

        if os.path.exists(src_class_dir):
            if not os.path.exists(dst_class_dir):
                shutil.copytree(src_class_dir, dst_class_dir)
        else:
            print(f"[WARNING] Class {class_id} not found in the training dataset.")

    print("[SUCCESS] Training data filtered for all categories.")

# Reorganise Test Dataset
def reorganise_test_csv():
    print("[INFO] Reorganising test dataset...")
    test_df_filtered = pd.read_csv(organised_test_csv_path)
    if os.path.exists(organised_test_path):
        shutil.rmtree(organised_test_path)
    os.makedirs(organised_test_path, exist_ok=True)

    for class_id in sorted(test_df_filtered['ClassId'].unique()):
        class_dir = os.path.join(organised_test_path, str(class_id))
        os.makedirs(class_dir, exist_ok=True)

        class_files = test_df_filtered[test_df_filtered['ClassId'] == class_id]
        for _, row in class_files.iterrows():
            src_path = os.path.join(original_test_path, os.path.basename(row['Path']))
            dst_path = os.path.join(class_dir, os.path.basename(row['Path']))
            if os.path.exists(src_path):
                shutil.copy(src_path, dst_path)
            else:
                print(f"[WARNING] Missing file: {src_path}")

    print(f"[SUCCESS] Test dataset reorganised and saved to {organised_test_path}.")

# Sanity Check for Class Alignment
def verify_class_directories():
    print("[INFO] Verifying class directories...")
    train_classes = sorted([d for d in os.listdir(filtered_train_path) if os.path.isdir(os.path.join(filtered_train_path, d))])
    test_classes = sorted([d for d in os.listdir(organised_test_path) if os.path.isdir(os.path.join(organised_test_path, d))])
    if train_classes != test_classes:
        raise ValueError(f"[ERROR] Class mismatch! Training classes: {train_classes}, Test classes: {test_classes}")
    else:
        print("[SUCCESS] Training and test class directories are aligned.")

# Initialise Data Generators
def initialise_data_generators():
    print("[INFO] Initialising data generators...")
    train_datagen = ImageDataGenerator(rescale=1.0 / 255, validation_split=0.2)
    test_datagen = ImageDataGenerator(rescale=1.0 / 255)
    return (
        train_datagen.flow_from_directory(filtered_train_path, target_size=(224, 224), batch_size=64, class_mode="categorical", subset="training"),
        train_datagen.flow_from_directory(filtered_train_path, target_size=(224, 224), batch_size=64, class_mode="categorical", subset="validation"),
        test_datagen.flow_from_directory(organised_test_path, target_size=(224, 224), batch_size=64, class_mode="categorical", shuffle=False)
    )

# Build the Model
def build_model(num_classes):
    print("[INFO] Building the model...")
    base_model = VGG16(weights="imagenet", include_top=False, input_shape=(224, 224, 3))
    base_model.trainable = False
    x = Flatten()(base_model.output)
    x = Dense(128, activation="relu")(x)
    x = Dense(64, activation="relu")(x)
    predictions = Dense(num_classes, activation="softmax")(x)
    model = Model(inputs=base_model.input, outputs=predictions)
    model.compile(optimizer=Adam(learning_rate=0.0001), loss="categorical_crossentropy", metrics=["accuracy"])
    return model

# Main Execution
if __name__ == "__main__":
    setup_test_csv()
    save_organised_test_csv()
    filter_train_data()
    reorganise_test_csv()
    verify_class_directories()
    train_gen, val_gen, test_gen = initialise_data_generators()
    model = build_model(len(train_gen.class_indices))
    model.fit(train_gen, validation_data=val_gen, epochs=15, callbacks=[
        EarlyStopping(monitor="val_loss", patience=3, restore_best_weights=True),
        ReduceLROnPlateau(monitor="val_loss", factor=0.2, patience=2, min_lr=1e-6)
    ])
    model.evaluate(test_gen)
    model.save("gtsrb_all_categories_model.h5")
    print("[SUCCESS] Model saved as 'gtsrb_all_categories_model.h5'.")


[INFO] Mounting Google Drive...
Mounted at /content/drive
[INFO] Google Drive mounted successfully.
[SUCCESS] Test.csv copied to /content/ML_CSV_Files/Test.csv.
[INFO] Saving organised test CSV...
[SUCCESS] Organised test CSV saved to /content/ML_CSV_Files/organised_test.csv.
[INFO] Filtering training data for all categories...
[INFO] Checking for Kaggle dataset...
[INFO] Kaggle dataset not found. Downloading...
Downloading from https://www.kaggle.com/api/v1/datasets/download/meowmeowmeowmeowmeow/gtsrb-german-traffic-sign?dataset_version_number=1...


100%|██████████| 612M/612M [00:03<00:00, 184MB/s]

Extracting files...





[SUCCESS] Kaggle dataset downloaded successfully.
[SUCCESS] Training data filtered for all categories.
[INFO] Reorganising test dataset...
[SUCCESS] Test dataset reorganised and saved to /content/Test_Organised.
[INFO] Verifying class directories...
[SUCCESS] Training and test class directories are aligned.
[INFO] Initialising data generators...
Found 31368 images belonging to 43 classes.
Found 7841 images belonging to 43 classes.
Found 12630 images belonging to 43 classes.
[INFO] Building the model...
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m58889256/58889256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step
Epoch 1/15


  self._warn_if_super_not_called()


[1m491/491[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 111ms/step - accuracy: 0.3399 - loss: 2.6725 - val_accuracy: 0.5541 - val_loss: 1.7337 - learning_rate: 1.0000e-04
Epoch 2/15
[1m491/491[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 92ms/step - accuracy: 0.7003 - loss: 1.2531 - val_accuracy: 0.6215 - val_loss: 1.3596 - learning_rate: 1.0000e-04
Epoch 3/15
[1m491/491[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 93ms/step - accuracy: 0.8045 - loss: 0.8441 - val_accuracy: 0.6599 - val_loss: 1.2373 - learning_rate: 1.0000e-04
Epoch 4/15
[1m491/491[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 92ms/step - accuracy: 0.8553 - loss: 0.6448 - val_accuracy: 0.6785 - val_loss: 1.1281 - learning_rate: 1.0000e-04
Epoch 5/15
[1m491/491[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 92ms/step - accuracy: 0.8850 - loss: 0.5251 - val_accuracy: 0.6910 - val_loss: 1.0962 - learning_rate: 1.0000e-04
Epoch 6/15
[1m491/491[0m [32m━━━━━━━━━━━━━━━━━━



[SUCCESS] Model saved as 'gtsrb_all_categories_model.h5'.
