In [None]:
import os
import pandas as pd
import numpy as np
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
from tensorflow import keras
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt

# Define the dataset path
dataset_path = 'gtsrb-dataset'

# Load CSV files
train_data = pd.read_csv(os.path.join(dataset_path, 'Train.csv'))
test_data = pd.read_csv(os.path.join(dataset_path, 'Test_Images.csv'))
meta_data = pd.read_csv(os.path.join(dataset_path, 'Meta.csv'))

# Preprocess a single image
def preprocess_image(image_path, class_id):
    image = Image.open(image_path)
    image = image.resize((30, 30))  # Resize the image to (30, 30)
    if image.mode != "RGB":
        image = image.convert("RGB")  # Convert grayscale images to RGB
    image_array = np.array(image)
    if len(image_array.shape) == 2:  # Handle grayscale images
        image_array = np.stack((image_array,) * 3, axis=-1)
    image_array = image_array / 255.0  # Normalize the image
    return image_array, class_id

# Load and preprocess training images
train_images = []
train_labels = []
for i, row in train_data.iterrows():
    image_path = os.path.join(dataset_path, row['Path'])
    image, class_id = preprocess_image(image_path, row['ClassId'])
    train_images.append(image)
    train_labels.append(class_id)

train_images = np.array(train_images)
train_labels = np.array(train_labels)

# Perform one-hot encoding on the training labels
label_binarizer = LabelBinarizer()
train_labels = label_binarizer.fit_transform(train_labels)

# Split the training data into training and validation sets
train_images, valid_images, train_labels, valid_labels = train_test_split(
    train_images, train_labels, test_size=0.2, random_state=123)

# Load and preprocess test images
test_images = []
test_labels = []
for i, row in test_data.iterrows():
    image_path = os.path.join(dataset_path, row['Path'])
    image, class_id = preprocess_image(image_path, row['ClassId'])
    test_images.append(image)
    test_labels.append(class_id)

# Convert test_images to a NumPy array
test_images = np.array(test_images)
test_labels = np.array(test_labels)

# Create the convolutional neural network model
model = keras.models.Sequential([
    keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(30, 30, 3)),
    keras.layers.MaxPooling2D((2, 2)),
    keras.layers.Conv2D(64, (3, 3), activation='relu'),
    keras.layers.MaxPooling2D((2, 2)),
    keras.layers.Conv2D(64, (3, 3), activation='relu'),
    keras.layers.Flatten(),
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dense(43, activation='softmax')
])

# Compile the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Define data augmentation configuration
data_augmentation = keras.preprocessing.image.ImageDataGenerator(
    rotation_range=10,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    zoom_range=0.1,
    horizontal_flip=False,
    fill_mode='nearest'
)

# Define learning rate schedule
lr_schedule = keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=0.001,
    decay_steps=1000,
    decay_rate=0.9
)

# Define early stopping
early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)

# Train the model with data augmentation and learning rate scheduling
model.fit(data_augmentation.flow(train_images, train_labels, batch_size=32),
          epochs=50,
          steps_per_epoch=len(train_images) // 32,
          validation_data=(valid_images, valid_labels),
          callbacks=[early_stopping],
          verbose=1)

# Convert the test labels to one-hot format
test_labels = label_binarizer.transform(test_labels)

# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(test_images, test_labels)
print(f'Test Loss: {test_loss:.4f}')
print(f'Test Accuracy: {test_accuracy:.4f}')

# Make predictions on the test images
predictions = model.predict(test_images)

# Convert predictions to class labels
predicted_labels = np.argmax(predictions, axis=1)

# Convert one-hot encoded test labels back to their original class labels
true_labels = np.argmax(test_labels, axis=1)

# Display the classified test images
fig, axes = plt.subplots(6, 4, figsize=(10, 10))
axes = axes.ravel()

for i in range(24):
    axes[i].imshow(test_images[i])
    axes[i].set_title(f"True Class: {true_labels[i]}\nPredicted Class: {predicted_labels[i]}")
    axes[i].axis('off')

plt.subplots_adjust(hspace=1, wspace=0.5)
plt.show()

# Compute the confusion matrix
cm = confusion_matrix(np.argmax(test_labels, axis=1), predicted_labels)

print("Confusion Matrix:")
print(cm)


Epoch 1/50


2023-07-20 14:53:35.211239: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]


Epoch 2/50
Epoch 3/50
Epoch 4/50
102/980 [==>...........................] - ETA: 1:02 - loss: 0.4328 - accuracy: 0.8664