In [None]:

# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES
# TO THE CORRECT LOCATION (/kaggle/input) IN YOUR NOTEBOOK,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

import os
import sys
from tempfile import NamedTemporaryFile
from urllib.request import urlopen
from urllib.parse import unquote, urlparse
from urllib.error import HTTPError
from zipfile import ZipFile
import tarfile
import shutil

CHUNK_SIZE = 40960
DATA_SOURCE_MAPPING = 'gtsrb-german-traffic-sign:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-data-sets%2F82373%2F191501%2Fbundle%2Farchive.zip%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20241011%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20241011T184054Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3Da92505262752b4d718506025784223d97e2a5c4f7067b1dbb12a0fa0bbf1f28c5d5aa6b05ecfcd48ff7c4c8b85245746157982d08998207b5591a9e6ede5e6ced353869aade0af0e048c6c02053636c7c365db52b77a8acf7a3e3108ccaef6734c50a5487319a5119e74f50a5e04ba88dc62d780c81f29395e988e98052784d78693276024ca87c2c7c5021b8b983a0716ac4b6b37a36e6f3bc81efe18e21c7f599c5e726f7c5c6f1f830669bf6ac1620968d67214d76f3d8ac8075f2efe4c4ee375a95404c92a9d51ef981e265e6ada85a6cdc25aadbf403e552a16f3ccb659be9e64caaf90e10222991029468d4f388a501d3cbaa32e03ef20ae43b44a53ff'

KAGGLE_INPUT_PATH='/kaggle/input'
KAGGLE_WORKING_PATH='/kaggle/working'
KAGGLE_SYMLINK='kaggle'

!umount /kaggle/input/ 2> /dev/null
shutil.rmtree('/kaggle/input', ignore_errors=True)
os.makedirs(KAGGLE_INPUT_PATH, 0o777, exist_ok=True)
os.makedirs(KAGGLE_WORKING_PATH, 0o777, exist_ok=True)

try:
  os.symlink(KAGGLE_INPUT_PATH, os.path.join("..", 'input'), target_is_directory=True)
except FileExistsError:
  pass
try:
  os.symlink(KAGGLE_WORKING_PATH, os.path.join("..", 'working'), target_is_directory=True)
except FileExistsError:
  pass

for data_source_mapping in DATA_SOURCE_MAPPING.split(','):
    directory, download_url_encoded = data_source_mapping.split(':')
    download_url = unquote(download_url_encoded)
    filename = urlparse(download_url).path
    destination_path = os.path.join(KAGGLE_INPUT_PATH, directory)
    try:
        with urlopen(download_url) as fileres, NamedTemporaryFile() as tfile:
            total_length = fileres.headers['content-length']
            print(f'Downloading {directory}, {total_length} bytes compressed')
            dl = 0
            data = fileres.read(CHUNK_SIZE)
            while len(data) > 0:
                dl += len(data)
                tfile.write(data)
                done = int(50 * dl / int(total_length))
                sys.stdout.write(f"\r[{'=' * done}{' ' * (50-done)}] {dl} bytes downloaded")
                sys.stdout.flush()
                data = fileres.read(CHUNK_SIZE)
            if filename.endswith('.zip'):
              with ZipFile(tfile) as zfile:
                zfile.extractall(destination_path)
            else:
              with tarfile.open(tfile.name) as tarfile:
                tarfile.extractall(destination_path)
            print(f'\nDownloaded and uncompressed: {directory}')
    except HTTPError as e:
        print(f'Failed to load (likely expired) {download_url} to path {destination_path}')
        continue
    except OSError as e:
        print(f'Failed to load {download_url} to path {destination_path}')
        continue

print('Data source import complete.')


Downloading gtsrb-german-traffic-sign, 641568792 bytes compressed
Downloaded and uncompressed: gtsrb-german-traffic-sign
Data source import complete.


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
import tensorflow as tf
from tensorflow import keras
from PIL import Image
import os
import pathlib
import random as rn
from sklearn.model_selection import train_test_split
import tensorflow_hub as hub
from tf_keras.preprocessing.image import ImageDataGenerator, img_to_array, array_to_img, load_img
from tf_keras.utils import to_categorical
from tf_keras.models import Sequential, load_model
from tf_keras.applications import VGG19
from tf_keras.layers import Conv2D, MaxPool2D, Dense, Flatten, Dropout
from sklearn.metrics import accuracy_score
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  1


In [None]:
data_dir = '../input/gtsrb-german-traffic-sign/'
train_path = '../input/gtsrb-german-traffic-sign/Train/'
test_path = '../input/gtsrb-german-traffic-sign/Test/'
height = 50
width = 50

In [None]:
classes = { 0:'Speed limit (20km/h)',
            1:'Speed limit (30km/h)',
            2:'Speed limit (50km/h)',
            3:'Speed limit (60km/h)',
            4:'Speed limit (70km/h)',
            5:'Speed limit (80km/h)',
            6:'End of speed limit (80km/h)',
            7:'Speed limit (100km/h)',
            8:'Speed limit (120km/h)',
            9:'No passing',
            10:'No passing veh over 3.5 tons',
            11:'Right-of-way at intersection',
            12:'Priority road',
            13:'Yield',
            14:'Stop',
            15:'No vehicles',
            16:'Veh > 3.5 tons prohibited',
            17:'No entry',
            18:'General caution',
            19:'Dangerous curve left',
            20:'Dangerous curve right',
            21:'Double curve',
            22:'Bumpy road',
            23:'Slippery road',
            24:'Road narrows on the right',
            25:'Road work',
            26:'Traffic signals',
            27:'Pedestrians',
            28:'Children crossing',
            29:'Bicycles crossing',
            30:'Beware of ice/snow',
            31:'Wild animals crossing',
            32:'End speed + passing limits',
            33:'Turn right ahead',
            34:'Turn left ahead',
            35:'Ahead only',
            36:'Go straight or right',
            37:'Go straight or left',
            38:'Keep right',
            39:'Keep left',
            40:'Roundabout mandatory',
            41:'End of no passing',
            42:'End no passing veh > 3.5 tons' }

In [None]:
batch_size = 4
seed = 42
train_datagen = ImageDataGenerator(rescale=1./255,
                                   validation_split=0.2)
train_dataset = train_datagen.flow_from_directory(train_path,
                                                  target_size=(height, width),
                                                  batch_size=batch_size,
                                                  class_mode='categorical',
                                                  shuffle=True,
                                                  seed=seed,
                                                  color_mode='rgb',
                                                  interpolation='hamming',
                                                  subset='training')

test_datagen = ImageDataGenerator(rescale=1./255,
                                  validation_split=0.2)
test_dataset = test_datagen.flow_from_directory(train_path,
                                                target_size=(height, width),
                                                batch_size=batch_size,
                                                class_mode='categorical',
                                                shuffle=True,
                                                seed=seed,
                                                color_mode='rgb',
                                                interpolation='hamming',
                                                subset='validation')

Found 31368 images belonging to 43 classes.
Found 7841 images belonging to 43 classes.


In [None]:
model = keras.models.Sequential([
    keras.layers.Conv2D(filters=16, kernel_size=(5,5), activation='relu', input_shape=(height,width,3)),
    keras.layers.Conv2D(filters=32, kernel_size=(5,5), activation='relu'),
    keras.layers.MaxPool2D(pool_size=(2, 2)),
    keras.layers.BatchNormalization(axis=-1),

    keras.layers.Conv2D(filters=64, kernel_size=(3,3), activation='relu'),
    keras.layers.Conv2D(filters=64, kernel_size=(3,3), activation='relu'),
    keras.layers.MaxPool2D(pool_size=(2, 2)),
    keras.layers.BatchNormalization(axis=-1),
    keras.layers.Dropout(rate=0.25),
    keras.layers.Flatten(),
    keras.layers.Dense(512, activation='relu'),
    keras.layers.BatchNormalization(),
    keras.layers.Dropout(rate=0.25),

    keras.layers.Dense(43, activation='softmax')
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
model.summary()

In [None]:
alpha=0.0001
epochs=30
optim = keras.optimizers.Adam(learning_rate=0.0001)
model.compile(optimizer = optim, loss = 'categorical_crossentropy', metrics = ['accuracy'])

In [None]:
import os
import numpy as np
from sklearn.model_selection import train_test_split

# Extract training data using ImageDataGenerator
X_train_list = []
y_train_list = []

train_generator = train_datagen.flow_from_directory(
    '../input/gtsrb-german-traffic-sign/Train/',
    target_size=(50, 50),
    batch_size=32,
    class_mode='categorical',
    shuffle=False
)

# Collect all the training data into lists
for i in range(len(train_generator)):
    X_batch, y_batch = train_generator.__next__()
    X_train_list.append(X_batch)
    y_train_list.append(y_batch)

# Convert lists to numpy arrays
X_train_full = np.vstack(X_train_list)
y_train_full = np.vstack(y_train_list)


split_data_dir = 'gtsrb_splits'
os.makedirs(split_data_dir, exist_ok=True)

## Calculate the exact split ratio to get exactly 7841 test samples
split_ratio = 7841 / 39209  # approximately 0.19999

## Split the training data
X_train_full, X_test, y_train_full, y_test = train_test_split(X_train_full, y_train_full, test_size=split_ratio, random_state=42)

test_file_path = os.path.join(split_data_dir, 'test_data.npz')
np.savez(test_file_path, X_test=X_test, y_test=y_test)
print(f'Saved derived test set to {test_file_path}')

# Gen 3 random splits for training and validation from the remaining 80% training data
n_splits = 3
for i in range(n_splits):
    X_train_split, X_val_split, y_train_split, y_val_split = train_test_split(X_train_full, y_train_full, test_size=0.2, random_state=42+i)

    # Save each split
    split_file_path = os.path.join(split_data_dir, f'split_{i}.npz')
    np.savez(split_file_path, X_train=X_train_split, X_val=X_val_split, y_train=y_train_split, y_val=y_val_split)
    print(f'Saved split {i} to {split_file_path}')


Found 39209 images belonging to 43 classes.
Saved derived test set to gtsrb_splits/test_data.npz
Saved split 0 to gtsrb_splits/split_0.npz
Saved split 1 to gtsrb_splits/split_1.npz
Saved split 2 to gtsrb_splits/split_2.npz


In [None]:
# Check the size of the training and test sets after splitting
print(f"Size of training set: {X_train_full.shape[0]}")
print(f"Size of test set: {X_test.shape[0]}")

Size of training set: 31368
Size of test set: 7841


In [None]:
import tf_keras as k
from tf_keras.applications import VGG16
from tf_keras.models import Sequential
from tf_keras.layers import Dense, Dropout, Flatten
from tf_keras.optimizers import Adam
from tf_keras.callbacks import EarlyStopping, ModelCheckpoint
import numpy as np
import os

# Directory for the saved splits
split_data_dir = 'gtsrb_splits'
n_splits = 3  # Number of splits

# Train on each split
for i in range(n_splits):
    # Load the split data
    split_file_path = os.path.join(split_data_dir, f'split_{i}.npz')
    with np.load(split_file_path) as data:
        X_train_split = data['X_train']
        y_train_split = data['y_train']
        X_val_split = data['X_val']
        y_val_split = data['y_val']

    # Load the VGG16 base model
    base_model = VGG16(weights='imagenet', include_top=False, input_shape=(50, 50, 3))
    base_model.summary()
    # Unfreeze the last few layers of the base model
    for layer in base_model.layers[-5:]:
        layer.trainable = True

    # Add new layers on top of the base model
    model2 = Sequential()
    model2.add(base_model)
    model2.add(Flatten())
    model2.add(Dense(512, activation='relu'))
    model2.add(Dropout(0.5))
    model2.add(Dense(43, activation='softmax'))  # 43 classes in the GTSRB dataset

    # Recreate the optimizer for each model
    alpha = 0.0001
    optim = Adam(learning_rate=alpha)  # Re-initialize the optimizer

    # Compile the model
    model2.compile(optimizer=optim, loss='categorical_crossentropy', metrics=['accuracy'])

    # Set up early stopping and model checkpointing
    early_stopping = EarlyStopping(monitor='val_accuracy', patience=5, verbose=1)
    checkpoint_filepath = f'best_model_split_{i}.keras'
    checkpoint = ModelCheckpoint(checkpoint_filepath, monitor='val_accuracy', verbose=1, save_best_only=True)

    # Train the model on the current split
    print(f'Training model {i+1} on split {i}...')
    model2.fit(
        X_train_split, y_train_split,
        epochs=10,
        validation_data=(X_val_split, y_val_split),
        batch_size=32,
        callbacks=[early_stopping, checkpoint]
    )

    # Save the trained model
    model_save_path = f'vgg16model_split_{i}.keras'
    model2.save(model_save_path)
    print(f'Saved model {i+1} to {model_save_path}')


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
Model: "vgg16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 50, 50, 3)]       0         
                                                                 
 block1_conv1 (Conv2D)       (None, 50, 50, 64)        1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 50, 50, 64)        36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 25, 25, 64)        0         
                                                                 
 block2_conv1 (Conv2D)       (None, 25, 25, 128)       73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 25, 25, 128)      

In [None]:
model2.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 vgg16 (Functional)          (None, 1, 1, 512)         14714688  
                                                                 
 flatten_2 (Flatten)         (None, 512)               0         
                                                                 
 dense_4 (Dense)             (None, 512)               262656    
                                                                 
 dropout_2 (Dropout)         (None, 512)               0         
                                                                 
 dense_5 (Dense)             (None, 43)                22059     
                                                                 
Total params: 14999403 (57.22 MB)
Trainable params: 14999403 (57.22 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
