In [1]:
import tensorflow as tf
import keras as k
import os
import numpy as np

import matplotlib.pyplot as plt
import pathlib
from PIL import Image
import IPython.display as display

Using TensorFlow backend.


### Batch Size

Batch Size is an important parameter when training a network. It can influence speed and generalization, not necessarily in the same direction. There is no golden rule for the batch size but 32 is a commom number to start with.

In [2]:
BATCH_SIZE = 32
IMAGE_SIZE = 32

### Prepare to Load Images from folder *train_images*

In [5]:
# Converter a String do caminho da pasta para um Path
pathTrainImagesFolder = pathlib.Path('gtsrb/train_images/')

# Forma de retirar todos os ficheiros de uma determinada pasta
files = list(pathTrainImagesFolder.glob('00001/*'))

# Transformar para Array o nome de todas as pastas que estão dentro da pasta train_images 
arrayFolderNames = np.array(os.listdir(pathTrainImagesFolder))

### Auxiliary Functions 

In [6]:
def get_label(file_path):
  # convert the path to a list of path components
  parts = tf.strings.split(file_path, os.path.sep)
  # The second to last is the class-directory
  return parts[-2] == arrayFolderNames

def decode_img(img):
  # convert the compressed string to a 3D uint8 tensor
  img = tf.image.decode_png(img, channels=3)
  # Use `convert_image_dtype` to convert to floats in the [0,1] range.
  img = tf.image.convert_image_dtype(img, tf.float32)
  # resize the image to the desired size.
  return tf.image.resize(img, [32,32])

def get_bytes_and_label(file_path):
  label = get_label(file_path)
  # load the raw data from the file as a string
  img = tf.io.read_file(file_path)
  img = decode_img(img)
  return img, label

### Loading images takes place in here

In [12]:
AUTOTUNE = tf.data.experimental.AUTOTUNE

listset = tf.data.Dataset.list_files("gtsrb/train_images/*/*.png")
dataset = listset.map(get_bytes_and_label, num_parallel_calls = AUTOTUNE)
return dataset

<ParallelMapDataset shapes: ((32, 32, 3), (43,)), types: (tf.float32, tf.bool)>


### Information about image shape and size of training set

In [8]:
for image, label in dataset.take(1):
  print("Image shape: ", image.numpy().shape)
  
dataset_length = [i for i,_ in enumerate(dataset)][-1] + 1
print("Total images in dataset: ",dataset_length)


Image shape:  (32, 32, 3)


KeyboardInterrupt: 

### Preparing dataset

In [None]:
dataset = dataset.cache()
dataset = dataset.shuffle(buffer_size = dataset_length)
dataset = dataset.batch(batch_size=BATCH_SIZE)
dataset = dataset.prefetch(buffer_size=AUTOTUNE)
dataset = dataset.repeat()

### Loading and preparing test dataset

In [None]:
testset = tf.data.Dataset.list_files("gtsrb/test_images/*/*.png")
testset = testset.map(get_bytes_and_label, num_parallel_calls = AUTOTUNE)
testset = testset.batch(batch_size=BATCH_SIZE)


### Show a batch of training images

In [None]:
def show_batch(image_batch, label_batch):
  columns = 6
  rows = BATCH_SIZE / columns + 1  
  plt.figure(figsize=(10, 2 * rows))
  for n in range(BATCH_SIZE):
      ax = plt.subplot(rows, columns, n+1)
      plt.imshow((image_batch[n]))
      plt.title(arrayFolderNames[label_batch[n]==1][0])
      plt.axis('off')
        
        
image_batch, label_batch = next(iter(dataset))        
show_batch(image_batch, label_batch.numpy())

### Build a simple network

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras import metrics
from tensorflow.keras.optimizers import Adam

def cnn55D3L2FC(classCount, imgSize, channels):
    model = Sequential()
    
    model.add(Conv2D(64, (5, 5), padding='same',
                     input_shape=(imgSize, imgSize, channels),
                     activation='relu'))                     
    model.add(Conv2D(64, (5, 5), activation='relu') ) 
    model.add(MaxPooling2D(pool_size=(2, 2)))    
    model.add(Conv2D(64, (3, 3), activation='relu') )   
    model.add(MaxPooling2D(pool_size=(2, 2)))
    
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.2))

    model.add(Dense(classCount, activation='softmax'))

    
    opt = Adam(lr=0.001)
    model.compile(optimizer = opt, loss='categorical_crossentropy', metrics=[ metrics.categorical_accuracy])
    return model

model = cnn55D3L2FC(43, 32, 3)


### Draw a diagram of the network

This requires installing some packages, namely graphviz

In [None]:
tf.keras.utils.plot_model(model, 'multi_input_and_output_model.png', show_shapes=True)

### Display a table with model information

When building a model kee an eye on the number of trainable parameters. Try to keep it below 10 million

In [None]:
print(model.summary())

### Perform a random split to create a validation set

Note that due to the way the GTSRB is build (using video sequences) this is not an ideal approach. Try to partition the set manually, selecting a few sequences for validation purposes. Load the training and validation sets independently.

In [None]:
train_size = int(0.8 * dataset_length)
val_size = int(0.2 * dataset_length)
train_dataset = dataset.take(train_size)
val_dataset = dataset.skip(val_size)

### Train the network 

In [None]:
history = model.fit(dataset, steps_per_epoch = 0.8*dataset_length/BATCH_SIZE,
          epochs=20, validation_data = val_dataset, validation_steps= 0.2*dataset_length/BATCH_SIZE)

### Plot the training history

In [None]:
print(history.history.keys())

# summarize history for accuracy
plt.plot(history.history['categorical_accuracy'])
plt.plot(history.history['val_categorical_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.show()
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.show()

### Evaluate the model on the test set

This is the accuracy number that really matters

In [None]:
model.evaluate(testset)