In [101]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import os
import cv2

# import the dataset files
from google.colab import drive
drive.mount('/content/gdrive/')

# change directory to the Train folder
os.chdir('/content/gdrive/MyDrive/Colab_Notebooks/Project_Engineering_Success/German_Traffic_Signs_Image_Classification/Train')

Drive already mounted at /content/gdrive/; to attempt to forcibly remount, call drive.mount("/content/gdrive/", force_remount=True).


In [102]:
# used to read images from files
from PIL import Image


num_classes = 43
data = []
labels = []
curr_path = os.getcwd()

# access each of the 43 subdirectories
for i in range(num_classes):
  path = os.path.join(curr_path, str(i))
  images = os.listdir(path)
  # access each picture within each subdirectory
  for pic in images:
    try:
      image = Image.open(path + "/" + pic)
      resize_image = image.resize((30, 30))
      numpy_image = np.array(resize_image)
      # grayscale_image = np.reshape(numpy_image[:,:,1], (900, 1))
      # grayscale_image = np.reshape(numpy_image[:,:,1], (30, 30, 1))
      # flattened_image = grayscale_image.flatten()
      # data = np.append(data, flattened_image, axis=0)
      data.append(numpy_image)
      labels.append(i)
    except Exception as e:
      print(e)
  print(i)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42


In [103]:
# convert data and labels arrays to numpy arrays
data = np.array(data)
labels = np.array(labels)

In [104]:
# save the data and labels arrays
np.save('/content/gdrive/MyDrive/Colab_Notebooks/Project_Engineering_Success/German_Traffic_Signs_Image_Classification/training/data.npy', data)
np.save('/content/gdrive/MyDrive/Colab_Notebooks/Project_Engineering_Success/German_Traffic_Signs_Image_Classification/training/labels.npy', labels)

In [105]:
# load the data and labels array
data = np.load('/content/gdrive/MyDrive/Colab_Notebooks/Project_Engineering_Success/German_Traffic_Signs_Image_Classification/training/data.npy')
labels = np.load('/content/gdrive/MyDrive/Colab_Notebooks/Project_Engineering_Success/German_Traffic_Signs_Image_Classification/training/labels.npy')

In [106]:
print(data.shape, labels.shape)
print(data.size, labels.size)

(39519, 30, 30, 3) (39519,)
106701300 39519


In [107]:
# used to split the training data into train and test files
from sklearn.model_selection import train_test_split

# use 20% of files for testing, and 80% for training
x_train, x_test, y_train, y_test = train_test_split(data, labels, test_size=0.2)

In [108]:
print("x_train shape:", x_train.shape)
print("y_train shape:", y_train.shape)
print("x_test shape:", x_test.shape)
print("y_test shape:", y_test.shape)

x_train shape: (31615, 30, 30, 3)
y_train shape: (31615,)
x_test shape: (7904, 30, 30, 3)
y_test shape: (7904,)


In [109]:
from keras.utils import to_categorical

# Convert the image arrays to binary matrices (one hot encoding)
  # One hot encoding associates categorical data with a number instead
  # In this case, we are associating the name of the sign with a number from 1-43 (since we have 43 classes)
y_train_enc = to_categorical(y_train, 43)
y_test_enc = to_categorical(y_test, 43)

In [110]:
from keras.models import Sequential
from keras.layers import Dense, Conv2D, MaxPool2D, Flatten, Dropout

# model = Sequential([
#     Dense(units=16, input_shape=x_train.shape[1:], activation='relu'),
#     Dense(units=32, activation='relu'),
#     # use 43 units because there are 43 classes in the German Traffic Signals Dataset
#     Dense(units=43, activation='softmax')
# ])

model = Sequential([
    Conv2D(filters=32, kernel_size=(5, 5), activation='relu', input_shape=x_train.shape[1:]),
    Conv2D(filters=32, kernel_size=(5, 5), activation='relu'),
    MaxPool2D(pool_size=(2, 2)),
    Dropout(rate=0.2),
    Conv2D(filters=64, kernel_size=(3, 3), activation='relu'),
    Conv2D(filters=64, kernel_size=(3, 3), activation='relu'),
    MaxPool2D(pool_size=(2, 2)),
    Dropout(rate=0.25),
    Flatten(),
    Dense(units=900, activation='relu'),
    Dropout(rate=0.5),
    # use 43 units because there are 43 classes in the German Traffic Signals Dataset
    Dense(units=43, activation='softmax')
])

In [111]:
# use adam optimizer because it is faster than sgd and because we are using a much larger dataset than the mnist digit dataset
# adam optimizer uses more compute resources than sgd, but it yields faster results and works better with large databases
model.compile(
    optimizer = 'adam',
    loss = 'categorical_crossentropy',
    metrics = ['accuracy']
)
model.summary()

Model: "sequential_7"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_28 (Conv2D)          (None, 26, 26, 32)        2432      
                                                                 
 conv2d_29 (Conv2D)          (None, 22, 22, 32)        25632     
                                                                 
 max_pooling2d_14 (MaxPooli  (None, 11, 11, 32)        0         
 ng2D)                                                           
                                                                 
 dropout_21 (Dropout)        (None, 11, 11, 32)        0         
                                                                 
 conv2d_30 (Conv2D)          (None, 9, 9, 64)          18496     
                                                                 
 conv2d_31 (Conv2D)          (None, 7, 7, 64)          36928     
                                                      

In [113]:
model.fit(
    x_train,
    y_train_enc,
    epochs=20
)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.src.callbacks.History at 0x788940875840>