# Project: Build a Traffic Sign Recognition Program

[//]: # (Image References)
[step-1-lenet-best]: ./images/step-1-lenet-best.png "LeNet - best results"
[step-1-lenet-t2]: ./images/step-1-lenet-t2.png "LeNet - graph with best results"

Overview
---

In this project we are going to train a network to recognize traffic signs.\
The dataset of traffic signs come from [German Traffic Sign Dataset](http://benchmark.ini.rub.de/?section=gtsrb&subsection=dataset) and this dataset will be used to train the neural network recognizing traffic signs.\
\
To track results of all training experiments I use [Neptune.ml](http://neptune.ml) tool.

## Step 0: Load The Data

At the beginning we need to load the dataset. Dataset is kept in the folder `data` and it contains following files:\
```bash
total 311760
-rw-r--r--@ 1 grzegorz.tyminski  staff   38888118 Nov  7  2016 test.p
-rw-r--r--@ 1 grzegorz.tyminski  staff  107146452 Feb  2  2017 train.p
-rw-r--r--@ 1 grzegorz.tyminski  staff   13578712 Feb  2  2017 valid.p
```
Class loading this dataset is implemented in the file [traffic_sign_dataset.py](traffic_sign_dataset.py).

In [None]:
from traffic_sign_dataset import TrafficData

# initiate and load dataset
dataset = TrafficData()

# normalize dataset --> change values of pixels from 0..255 to 0..1
dataset.normalize_data()
# randomize the orderd of images in dataset
dataset.shuffle_dataset()
    
X_train, y_train = dataset.get_training_dataset()
X_valid, y_valid = dataset.get_validation_dataset()
X_test, y_test = dataset.get_testing_dataset()



In [None]:
# TODO: Number of training examples
n_train = len(X_train)

# TODO: Number of validation examples
n_validation = len(X_valid)

# TODO: Number of testing examples.
n_test = len(X_test)

# TODO: What's the shape of an traffic sign image?
image_shape = [32, 32, 3]

# TODO: How many unique classes/labels there are in the dataset.
n_classes = 43

print("Number of training examples =", n_train)
print("Number of testing examples =", n_test)
print("Image data shape =", image_shape)
print("Number of classes =", n_classes)

In [None]:
dataset.preview_random()

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(16,4))
plt.title("Number of images per class in dataset")

plt.hist(y_train,bins = n_classes, alpha=0.5, label = 'train')
plt.hist(y_test,bins = n_classes, alpha=0.5, label='test')
plt.hist(y_valid,bins = n_classes, alpha=0.5, label='valid')

plt.legend()

In [None]:
print(dataset.label_for(0))
print(dataset.label_for(7))
print(dataset.label_for(16))
print(dataset.label_for(19))
print(dataset.label_for(24))
print(dataset.label_for(27))
print(dataset.label_for(29))
print(dataset.label_for(32))
print(dataset.label_for(37))
print(dataset.label_for(39))
print(dataset.label_for(41))

## Step 1: Train same LeNet as in MNIST example (v1)

In [None]:
from LeNet import LeNet
import tensorflow as tf

lenet = LeNet(output_classes=43)

In [None]:
lenet.set_hiperparams(epochs=10, batch_size=64, learn_rate=0.002)

In [None]:
lenet.train(dataset)

In [None]:
lenet.model_summary()

## Step 2: Train modified LeNet with dropouts (v2)

In [None]:
from LeNet2 import LeNet2
import tensorflow as tf

lenet = LeNet2(output_classes=43)

In [None]:
lenet.set_hiperparams(epochs=10, batch_size=64, learn_rate=0.002, dropout_val=0.25)

In [None]:
lenet.train(dataset)

In [None]:
lenet.model_summary()

## Step 3: Improve dataset normalization

- improve brightness
- to grayscale

... and training __with__ Dropout

In [None]:
from traffic_sign_dataset import TrafficData

# initiate and load dataset
dataset = TrafficData()

# normalize dataset --> change values of pixels from 0..255 to 0..1
dataset.normalize_data(brightness=True, grayscale=True)
# randomize the orderd of images in dataset
dataset.shuffle_dataset()
    
X_train, y_train = dataset.get_training_dataset()
X_valid, y_valid = dataset.get_validation_dataset()
X_test, y_test = dataset.get_testing_dataset()



In [None]:
dataset.preview_random(grayscale=True)

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(16,4))
plt.title("Traffic Signs classes' distribution")

plt.hist(y_train,bins = n_classes, alpha=0.5, label = 'train')
plt.hist(y_test,bins = n_classes, alpha=0.5, label='test')
plt.hist(y_valid,bins = n_classes, alpha=0.5, label='valid')

plt.legend()

In [None]:
from LeNet3 import LeNet3
import tensorflow as tf

lenet = LeNet3(output_classes=43)

In [None]:
lenet.set_hiperparams(epochs=15, batch_size=64, learn_rate=0.002, dropout_val=1.0)

In [None]:
lenet.train(dataset)

In [None]:
lenet.model_summary()

## Step 4: Dataset with normalization as before

... but training __without__ Dropout

In [None]:
from LeNet4 import LeNet4
import tensorflow as tf

lenet = LeNet4(output_classes=43, channels=1)

In [None]:
lenet.set_hiperparams(epochs=10, batch_size=64, learn_rate=0.002)

In [None]:
lenet.train(dataset)

In [None]:
lenet.model_summary()

In [None]:
from traffic_sign_dataset import TrafficData

# initiate and load dataset
dataset = TrafficData()

# normalize dataset --> change values of pixels from 0..255 to 0..1
dataset.normalize_data(brightness=True, grayscale=True)
# randomize the orderd of images in dataset
dataset.shuffle_dataset()
    
X_train, y_train = dataset.get_training_dataset()
X_valid, y_valid = dataset.get_validation_dataset()
X_test, y_test = dataset.get_testing_dataset()



In [None]:
from LeNet4 import LeNet4
import tensorflow as tf

lenet = LeNet4(output_classes=43, channels=1)

In [None]:
lenet.set_hiperparams(epochs=10, batch_size=4, learn_rate=0.001)

In [None]:
lenet.train(dataset)

### Let's check on `test` dataset where the model has problems to recognize correctly

In [None]:
import numpy as np
n_classes = 43

result = lenet.predict(X_test)
counter = 0

wrong = [list({}) for i in range(n_classes)]

for ind in range(len(X_test)):
    image = X_test[ind]
    label = dataset.label_for(y_test[ind])
    pred_label = result[ind]
    if pred_label != label:
        counter = counter+1
        cls_no = y_test[ind]

        cls_list = wrong[cls_no]
        if cls_list is None:
            cls_list = []
        cls_list.append({ind, pred_label})
        wrong[cls_no] = cls_list
        # print(ind, ',', result[ind], ',', label)
print('wrong predictions total - ', counter)
print()
print('wrong predictions per class')
for cls_no in range(n_classes):
    label = dataset.label_for(y_test[cls_no])
    counter = len(wrong[cls_no])
    print(label, "=", counter)

## Step 5 - augment dataset

In [None]:
from traffic_sign_dataset import TrafficData

# initiate and load dataset
dataset = TrafficData()
# augment dataset to get 2x bigger dataset
dataset.augment_dataset()
# augment dataset again to get 4x bigger dataset
dataset.augment_dataset()

# normalize dataset --> change values of pixels from 0..255 to 0..1
dataset.normalize_data(brightness=False, grayscale=True)
# randomize the orderd of images in dataset
dataset.shuffle_dataset()
    
X_train, y_train = dataset.get_training_dataset()
X_valid, y_valid = dataset.get_validation_dataset()
X_test, y_test = dataset.get_testing_dataset()



In [None]:
from LeNet4 import LeNet4
import tensorflow as tf

lenet = LeNet4(output_classes=43, channels=1)

In [None]:
lenet.set_hiperparams(epochs=10, batch_size=32, learn_rate=0.002)

In [None]:
for batch_size in [8, 16, 32, 64]:
    for learn_rate in [0.001, 0.002]:
        lenet.set_hiperparams(epochs=10, batch_size=batch_size, learn_rate=learn_rate)
        lenet.train(dataset, neptune_tags=['augmentation'])

----------------

## Step 6 - verify model on random images from internet

Let's first download some images.

In [None]:
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import numpy as np
from scipy.special import softmax
import pprint
from traffic_sign_dataset import TrafficData
from graph import Graph
dataset = TrafficData()

for index in range(1, 11, 1):
    print("------------------------------")
    image_name = f"input/sign_{index}.png"
    orig_image = mpimg.imread(image_name)
    image = dataset.__normalize_image__(orig_image, brightness=True, grayscale=True)

    result = lenet.predict_image(image)
    result_softmax = softmax(result)
    #print(np.max(result_softmax))
    result_max = np.argmax(result)
    result_sorted = np.argsort(result)[:,::-1]    
    result_top3 = result_sorted[:,:3]
    #print(result_softmax)

    print(f"File {image_name}")
    plt.figure(figsize=(2,2))
    plt.imshow(orig_image, cmap="gray")
    plt.show()

    for i,classid in enumerate(result_top3[0]):
        print(f"TOP{i}(prob={result_softmax[0,classid]:0.2f}): {dataset.label_for(classid)} (classid={classid}) ")


In [None]:
import random
def preview_class(classid: int = 1, max_cells: int = 10):
    max_cells = 10
    indexes = np.where(y_train == classid)[0]
    indexes = random.sample(set(indexes), max_cells)
    f, axes = plt.subplots(1, max_cells, sharey=True, figsize=(12,12))
    counter = 0
    for index in indexes:
        if counter < max_cells:
            image = X_train[index]
            img = np.reshape(image, [32, 32])
            axes[counter].axis('off')
            axes[counter].imshow(img, cmap='gray')
            counter = counter + 1
        elif counter == max_cells:
            pass
        
    plt.tight_layout()
    plt.show()
    pass

preview_class(16)

----------------