In [None]:
# William Goggins - S00248401

import os
import shutil
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Flatten, Dense
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

# Paths
gdrive_csv_path = "/content/drive/MyDrive/Colab_Files/Test.csv"
content_csv_path = "/content/ML_CSV_Files/Test.csv"
organised_test_csv_path = "/content/ML_CSV_Files/organised_test.csv"
original_train_path = "/root/.cache/kagglehub/datasets/meowmeowmeowmeowmeow/gtsrb-german-traffic-sign/versions/1/Train"
filtered_train_path = "/content/Train_11_Categories"
original_test_path = "/root/.cache/kagglehub/datasets/meowmeowmeowmeowmeow/gtsrb-german-traffic-sign/versions/1/Test"
organised_test_path = "/content/Test_Organised"

# Setup and Check Google Drive for Test.csv
def setup_test_csv():
    print("[INFO] Mounting Google Drive...")
    from google.colab import drive
    drive.mount('/content/drive')

    print("[INFO] Google Drive mounted successfully.")

    # Ensure destination directory exists
    os.makedirs(os.path.dirname(content_csv_path), exist_ok=True)

    if os.path.exists(gdrive_csv_path):
        shutil.copy(gdrive_csv_path, content_csv_path)
        print(f"[SUCCESS] Test.csv copied to {content_csv_path}.")
    else:
        raise FileNotFoundError(f"[ERROR] Test.csv not found at {gdrive_csv_path}. Please ensure it exists.")

# Save Organised Test CSV
def save_organised_test_csv():
    print("[INFO] Saving organised test CSV...")

    test_df = pd.read_csv(content_csv_path)
    valid_classes = list(range(11))  # Classes 0 to 10

    # Filter for valid classes and save
    test_df_filtered = test_df[test_df['ClassId'].isin(valid_classes)]
    test_df_filtered.to_csv(organised_test_csv_path, index=False)

    print(f"[SUCCESS] Organised test CSV saved to {organised_test_csv_path}.")

# Reorganise Training Data for Classes 0–10
def setup_kaggle_dataset():
    print("[INFO] Checking for Kaggle dataset...")
    dataset_path = os.path.dirname(original_train_path)

    if not os.path.exists(dataset_path):
        print("[INFO] Kaggle dataset not found. Downloading...")
        import kagglehub
        kagglehub.dataset_download("meowmeowmeowmeowmeow/gtsrb-german-traffic-sign")
        print("[SUCCESS] Kaggle dataset downloaded successfully.")
    else:
        print("[INFO] Kaggle dataset already exists.")

def filter_train_data():
    print("[INFO] Filtering training data for classes 0–10...")

    # Ensure dataset is downloaded
    setup_kaggle_dataset()
    os.makedirs(filtered_train_path, exist_ok=True)

    valid_classes = [str(i) for i in range(11)]  # Classes 0 to 10
    for class_id in valid_classes:
        src_class_dir = os.path.join(original_train_path, class_id)
        dst_class_dir = os.path.join(filtered_train_path, class_id)

        if os.path.exists(src_class_dir):
            if not os.path.exists(dst_class_dir):
                shutil.copytree(src_class_dir, dst_class_dir)
        else:
            print(f"[WARNING] Class {class_id} not found in the training dataset.")

    print("[SUCCESS] Training data filtered for classes 0–10.")

# Reorganise Test Dataset
def reorganise_test_csv():
    print("[INFO] Reorganising test dataset...")

    # Load the organised test CSV file
    test_df_filtered = pd.read_csv(organised_test_csv_path)

    # Clear the target directory completely
    if os.path.exists(organised_test_path):
        shutil.rmtree(organised_test_path)  # Remove all files and folders
    os.makedirs(organised_test_path, exist_ok=True)

    # Organise files by class
    valid_classes = list(range(11))  # Ensure only classes 0-10
    for class_id in valid_classes:
        class_dir = os.path.join(organised_test_path, str(class_id))
        os.makedirs(class_dir, exist_ok=True)

        class_files = test_df_filtered[test_df_filtered['ClassId'] == class_id]
        for _, row in class_files.iterrows():
            src_path = os.path.join(original_test_path, os.path.basename(row['Path']))
            dst_path = os.path.join(class_dir, os.path.basename(row['Path']))

            if os.path.exists(src_path):
                shutil.copy(src_path, dst_path)
            else:
                print(f"[WARNING] Missing file: {src_path}")

    print(f"[SUCCESS] Test dataset reorganised and saved to {organised_test_path}.")

# Sanity Check for Class Alignment
def verify_class_directories():
    print("[INFO] Verifying class directories...")

    train_classes = sorted([d for d in os.listdir(filtered_train_path) if os.path.isdir(os.path.join(filtered_train_path, d))])
    test_classes = sorted([d for d in os.listdir(organised_test_path) if os.path.isdir(os.path.join(organised_test_path, d))])

    print(f"[INFO] Training classes: {train_classes}")
    print(f"[INFO] Test classes: {test_classes}")

    if train_classes != test_classes:
        raise ValueError(f"[ERROR] Class mismatch! Training classes: {train_classes}, Test classes: {test_classes}")
    else:
        print("[SUCCESS] Training and test class directories are aligned.")

# Initialise Data Generators
def initialise_data_generators():
    print("[INFO] Initialising data generators...")
    train_datagen = ImageDataGenerator(rescale=1.0 / 255, validation_split=0.2)
    test_datagen = ImageDataGenerator(rescale=1.0 / 255)

    train_generator = train_datagen.flow_from_directory(
        filtered_train_path,
        target_size=(224, 224),
        batch_size=64,
        class_mode="categorical",
        subset="training"
    )

    validation_generator = train_datagen.flow_from_directory(
        filtered_train_path,
        target_size=(224, 224),
        batch_size=64,
        class_mode="categorical",
        subset="validation"
    )

    test_generator = test_datagen.flow_from_directory(
        organised_test_path,
        target_size=(224, 224),
        batch_size=64,
        class_mode="categorical",
        shuffle=False
    )

    return train_generator, validation_generator, test_generator

# Build the Model
def build_model():
    print("[INFO] Building the model...")
    base_model = VGG16(weights="imagenet", include_top=False, input_shape=(224, 224, 3))
    base_model.trainable = False

    x = base_model.output
    x = Flatten()(x)
    x = Dense(128, activation="relu")(x)
    x = Dense(64, activation="relu")(x)
    predictions = Dense(11, activation="softmax")(x)

    model = Model(inputs=base_model.input, outputs=predictions)
    model.compile(optimizer=Adam(learning_rate=0.0001), loss="categorical_crossentropy", metrics=["accuracy"])

    return model

# Train the Model
def train_model(model, train_generator, validation_generator):
    print("[INFO] Training the model...")
    callbacks = [
        EarlyStopping(monitor="val_loss", patience=3, restore_best_weights=True),
        ReduceLROnPlateau(monitor="val_loss", factor=0.2, patience=2, min_lr=1e-6)
    ]

    history = model.fit(
        train_generator,
        validation_data=validation_generator,
        epochs=30,
        callbacks=callbacks
    )

    return history

# Evaluate the Model
def evaluate_model(model, test_generator):
    print("[INFO] Evaluating the model...")
    test_loss, test_accuracy = model.evaluate(test_generator)
    print(f"Test Accuracy: {test_accuracy:.2f}, Test Loss: {test_loss:.2f}")

# Main Execution
if __name__ == "__main__":
    setup_test_csv()                 # Ensure Test.csv is ready
    save_organised_test_csv()        # Save organised test CSV
    filter_train_data()              # Step 3: Prepare training data for classes 0–10
    reorganise_test_csv()            # Step 4: Reorganise test data for classes 0–10
    verify_class_directories()       # Verify class alignment before training

    train_gen, val_gen, test_gen = initialise_data_generators()  # Initialise data generators
    model = build_model()                                        # Build model
    train_model(model, train_gen, val_gen)                      # Train model
    evaluate_model(model, test_gen)                             # Evaluate model

    model.save("gtsrb_model.h5")
    print("[SUCCESS] Model saved as 'gtsrb_model.h5'.")


[INFO] Mounting Google Drive...
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
[INFO] Google Drive mounted successfully.
[SUCCESS] Test.csv copied to /content/ML_CSV_Files/Test.csv.
[INFO] Saving organised test CSV...
[SUCCESS] Organised test CSV saved to /content/ML_CSV_Files/organised_test.csv.
[INFO] Filtering training data for classes 0–10...
[INFO] Checking for Kaggle dataset...
[INFO] Kaggle dataset not found. Downloading...
Downloading from https://www.kaggle.com/api/v1/datasets/download/meowmeowmeowmeowmeow/gtsrb-german-traffic-sign?dataset_version_number=1...


100%|██████████| 612M/612M [00:29<00:00, 21.7MB/s]

Extracting files...





[SUCCESS] Kaggle dataset downloaded successfully.
[SUCCESS] Training data filtered for classes 0–10.
[INFO] Reorganising test dataset...
[SUCCESS] Test dataset reorganised and saved to /content/Test_Organised.
[INFO] Verifying class directories...
[INFO] Training classes: ['0', '1', '10', '2', '3', '4', '5', '6', '7', '8', '9']
[INFO] Test classes: ['0', '1', '10', '2', '3', '4', '5', '6', '7', '8', '9']
[SUCCESS] Training and test class directories are aligned.
[INFO] Initialising data generators...
Found 13344 images belonging to 11 classes.
Found 3336 images belonging to 11 classes.
Found 5460 images belonging to 11 classes.
[INFO] Building the model...
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m58889256/58889256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 0us/step
[INFO] Training the model...
Epoch 1/30


  self._warn_if_super_not_called()


[1m209/209[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 123ms/step - accuracy: 0.3785 - loss: 1.8502 - val_accuracy: 0.4910 - val_loss: 1.5099 - learning_rate: 1.0000e-04
Epoch 2/30
[1m209/209[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 89ms/step - accuracy: 0.7062 - loss: 1.0014 - val_accuracy: 0.5830 - val_loss: 1.3156 - learning_rate: 1.0000e-04
Epoch 3/30
[1m209/209[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 88ms/step - accuracy: 0.7934 - loss: 0.7546 - val_accuracy: 0.5917 - val_loss: 1.2618 - learning_rate: 1.0000e-04
Epoch 4/30
[1m209/209[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 89ms/step - accuracy: 0.8450 - loss: 0.5789 - val_accuracy: 0.5995 - val_loss: 1.2306 - learning_rate: 1.0000e-04
Epoch 5/30
[1m209/209[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 88ms/step - accuracy: 0.8744 - loss: 0.4840 - val_accuracy: 0.5980 - val_loss: 1.2310 - learning_rate: 1.0000e-04
Epoch 6/30
[1m209/209[0m [32m━━━━━━━━━━━━━━━━━━



Test Accuracy: 0.74, Test Loss: 0.86
[SUCCESS] Model saved as 'gtsrb_model.h5'.
