In [3]:

# Kaggle part
!pip install kaggle
from google.colab import files
uploaded = files.upload() # pls upload the kaggle.json file
import os
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json
!kaggle competitions download -c ds3-datathon-traffic-signs
!unzip -q ds3-datathon-traffic-signs.zip -d /content/traffic_signs
!ls /content/traffic_signs

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPool2D, Dense, Flatten, Dropout, BatchNormalization
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
import os
# citations : Towards datascience and analytics vidhya and data augmentation links attached in the slide deck for reference
#the function to load data
csv_path = '/content/traffic_signs/traffic_signs/traffic_train.csv'
image_base_path = '/content/traffic_signs/traffic_signs/'
def load_data(csv_path, base_path, img_size=(30, 30)):
    data = []
    labels = []
    df = pd.read_csv(csv_path)
    for idx, row in df.iterrows():
        image_path = os.path.join(base_path, row['Path'])
        try:
            image = Image.open(image_path)
            image = image.resize(img_size)
            data.append(np.array(image))
            labels.append(row['ClassId'])
        except IOError as e:
            print(f"Error loading image: {image_path} - {e}")
    data = np.array(data)
    labels = np.array(labels)
    return data, labels


data, labels = load_data(csv_path, image_base_path)

X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=42)

y_train = to_categorical(y_train, 43)
y_test = to_categorical(y_test, 43)

# Data Augmentation to recognise the different orientation of the image
datagen = ImageDataGenerator(
    rotation_range=10,
    zoom_range=0.15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.15,
    horizontal_flip=False,
    fill_mode="nearest")

# CNN model
model = Sequential()

# First Convolutional Block
model.add(Conv2D(filters=32, kernel_size=(5,5), activation='relu', input_shape=X_train.shape[1:]))
model.add(BatchNormalization())
model.add(Conv2D(filters=32, kernel_size=(5,5), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPool2D(pool_size=(2, 2)))
model.add(Dropout(rate=0.25))

# Second Convolutional Block
model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu'))
model.add(BatchNormalization())
model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPool2D(pool_size=(2, 2)))
model.add(Dropout(rate=0.25))

# Flattening and Dense Layers
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(rate=0.5))
model.add(Dense(43, activation='softmax'))

# Compile the model
model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=0.001), metrics=['accuracy'])

# Callbacks for prevention of overfitting 
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.0001)

# Train the model 
epochs = 20  
batch_size = 32

history = model.fit(datagen.flow(X_train, y_train, batch_size=batch_size),
                    steps_per_epoch=len(X_train) // batch_size,
                    epochs=epochs,
                    validation_data=(X_test, y_test),
                    callbacks=[early_stopping, reduce_lr])

# Save the model
model.save("improved_traffic_classifier.h5")




Saving kaggle.json to kaggle (2).json
ds3-datathon-traffic-signs.zip: Skipping, found more recently modified local copy (use --force to force download)
replace /content/traffic_signs/traffic_signs/Meta.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: A
traffic_signs




Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


  saving_api.save_model(


In [4]:
# Function to load test data
def load_test_data(csv_path, base_path, img_size=(30, 30)):
    data = []
    image_paths = []  
    df = pd.read_csv(csv_path)
    for idx, row in df.iterrows():
        image_path = os.path.join(base_path, str(row['Path']))  
        image = Image.open(image_path)
        image = image.resize(img_size)
        image = np.array(image)
        data.append(image)
        image_paths.append(image_path)
    return np.array(data), image_paths

test_csv_path = '/content/traffic_signs/traffic_signs/traffic_test.csv'
test_image_base_path = '/content/traffic_signs/traffic_signs/'

test_data, test_image_paths = load_test_data(test_csv_path, test_image_base_path)

test_predictions = model.predict(test_data)
predicted_classes = np.argmax(test_predictions, axis=1)

submission_df = pd.DataFrame({
    'Id': range(0, len(predicted_classes)),  
    'ClassId': predicted_classes
})

# Save the submission to a CSV file
submission_file_path = '/content/traffic_signs/traffic_submission.csv'
submission_df.to_csv(submission_file_path, index=False)
print(f'Submission file saved to: {submission_file_path}')

#  download the submission file 
from google.colab import files
files.download(submission_file_path)


Submission file saved to: /content/traffic_signs/traffic_submission.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>