# Question 4 

### Use the German Traffic Sign Recognition dataset in the pickle files to determine the number of traffic signs in them. Prepare the dataset to train a CNN. (3 Conv layers and 2 dense layers)  

#### Load and analyse data  

In [1]:
# install the common libraries

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sns
%matplotlib inline

In [2]:
# import necessary neural network libraries

import tensorflow as tf 
import cv2
import os
import warnings
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical
from keras.models import Sequential, load_model
from keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Dropout
from keras.optimizers import Adam
import pickle as pkl

In [3]:
training_file = 'train.p'
validation_file= 'valid.p'
testing_file = 'test.p'

with open(training_file, mode='rb') as f:
    train_data = pkl.load(f)
with open(validation_file, mode='rb') as f:
    valid_data = pkl.load(f)
with open(testing_file, mode='rb') as f:
    test_data = pkl.load(f)

In [4]:
X_train, y_train = train_data['features'], train_data['labels']
X_valid, y_valid = valid_data['features'], valid_data['labels']
X_test, y_test = test_data['features'], test_data['labels']

    
print(X_train.shape)
print(X_test.shape)
print(X_valid.shape)

(30120, 32, 32, 3)
(10950, 32, 32, 3)
(3810, 32, 32, 3)


#### Prepare dataset for training a CNN

In [5]:
from sklearn.utils import shuffle

In [6]:
X_train, y_train = shuffle(X_train, y_train)
X_valid, y_valid = shuffle(X_valid, y_valid)
X_test, y_test = shuffle(X_test, y_test)

In [7]:
#Nomralisation
X_train = (X_train-X_train.mean())/(np.max(X_train)-np.min(X_train))
X_valid = (X_valid-X_valid.mean())/(np.max(X_valid)-np.min(X_valid))
X_test = (X_test-X_test.mean())/(np.max(X_test)-np.min(X_test))

In [8]:
# one hot encode outputs
y_train = to_categorical(y_train, 43)
y_test = to_categorical(y_test, 43)
y_valid = to_categorical(y_valid, 43)

#### CNN Model  

In [9]:
# define the CNN Model
# the model is sequential type 

model = Sequential()

#layer -1: Conv layer, 64 filters , 5 by 5 filter size
model.add(Conv2D(filters = 64, kernel_size = (5, 5), input_shape=(32, 32, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(rate=0.25))

model.add(Conv2D(filters = 32, kernel_size = (5, 5), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(rate=0.25))
          
model.add(Conv2D(filters = 32, kernel_size = (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(rate=0.25))

model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(43, activation='softmax'))


model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 28, 28, 64)        4864      
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 14, 14, 64)        0         
_________________________________________________________________
dropout (Dropout)            (None, 14, 14, 64)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 10, 10, 32)        51232     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 5, 5, 32)          0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 5, 5, 32)          0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 3, 3, 32)          9

In [None]:
print(os.listdir())

In [None]:
from keras.callbacks import ModelCheckpoint

In [None]:
checkpoint = ModelCheckpoint('lenet_german{epoch:10d}.h5')

history = model.fit(X_train, y_train, batch_size= 400, epochs= 10,validation_split=0.0, validation_data = (X_valid, y_valid), callbacks =[checkpoint], shuffle = 1)

In [None]:
#plotting graphs for accuracy 
plt.figure(0)
plt.plot(history.history['accuracy'], label='training accuracy')
plt.plot(history.history['val_accuracy'], label='val accuracy')
plt.title('Accuracy')
plt.xlabel('epochs')
plt.ylabel('accuracy')
plt.legend()
plt.show()

#plotting graphs for loss 
plt.figure(1)
plt.plot(history.history['loss'], label='training loss')
plt.plot(history.history['val_loss'], label='val loss')
plt.title('Loss')
plt.xlabel('epochs')
plt.ylabel('loss')
plt.legend()
plt.show()

In [None]:
# performance measure 
from sklearn.metrics import accuracy_score

scores = model.evaluate(X_test, y_test, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))

In [None]:
print("Loss: %.4f" % (scores[0]))