# Traffic Sign Recognition
The goal of this work is to build a classifier for traffic sign recognition.
The dataset consist of 50 000 images of traffic sign in different light condition
Angle, resolution, luminosity.

## Dataset
* Total = 50000 
* Class = 43

## Loading the data
In this section we will load the data. Note The data are in two format.
* The extracted feature using Histogram Oriented Gradien in 3 Format HOG1, HOG2, HOG3
* The actual image with the region of of interest in a csv file

## Step 1 : Load the data

In [97]:
#Importing necessary library
import matplotlib.pyplot as plt
import csv
import cv2 as cv

import glob
import os
import numpy as np
from sklearn.ensemble import RandomForestClassifier

#Import scikit-learn metrics module for accuracy calculation
from sklearn import metrics

# function for reading the images
# arguments: path to the traffic sign data
# returns: list of images, list of corresponding labels 

def readTrafficSigns(rootpath):
    '''Reads traffic sign data 
    Arguments: path to the traffic sign data
    Returns:   list of images, list of corresponding labels'''
    images = [] # images
    labels = [] # corresponding labels
    # loop over all 42 classes
    for c in range(0,43):
        prefix = rootpath + '/' + format(c, '05d') + '/' # subdirectory for class
        gtFile = open(prefix + 'GT-'+ format(c, '05d') + '.csv') # annotations file
        gtReader = csv.reader(gtFile, delimiter=';') # csv parser for annotations file

        next(gtReader) # skip header
        # loop over all images in current annotations file
        for row in gtReader:
            images.append(plt.imread(prefix + row[0])) # the 1th column is the filename
            labels.append(row[7]) # the 8th column is the label
        gtFile.close()
    return images, labels

In [98]:
#Read traffic test sign
def readTestTrafficSigns(rootpath):
    """Read traffic sign test data 
    Arguments: path to the test folder
    Returns: list of images and list of the label"""
    X_test = []
    y_test = []
    gtFile = open(rootpath+'/GT-final_test.test.csv')
    gtReader = csv.reader(gtFile, delimiter=';')
    
    next(gtReader)
    # loop over the row and low the image
    for row in gtReader:
        X_test.append(plt.imread(rootpath+'/'+row[0]))
        y_test.append(row[6])
    gtFile.close()
    return X_test, y_test

In [99]:

def readHogTrafficSigns(rootpath):
    ''' Read HOG of traffic sign image
    Arguments: path to the traffic sign hog data
    Returns: list of HOG Array, list of corresponding labels'''
    X_train = [] #list of hog
    y_train = [] #list of labels
    #loop over all 43 classes
    for c in range(0,42):
        prefix = rootpath + '/' + format(c, '05d') + '/' # subdirectory for class
        for file in glob.glob(prefix + '/*.txt'):
            
            X_train.append(np.loadtxt(file))
            y_train.append(c)
    return np.asarray(X_train), np.asarray(y_train)

In [100]:
def readTrafficHogTest(rootpath):
    '''Reads traffic sign test data 
    Arguments: path to the traffic sign data
    Returns:   list of images, list of corresponding labels'''
    X_test = [] # images
    y_test = [] # corresponding labels
    
    #Readinf the file with label and class 
    gtFile = open('./Database/BaseDeTest/Test_VeriteTerrain(GT).csv') # annotations file
    # Read File 
    gtReader = csv.reader(gtFile, delimiter=';') # csv parser for annotations file
    next(gtReader) # skip header
    for row in gtReader:
        name = row[0].split('.')[0]+'.txt'
        filename = rootpath+name
        X_test.append(np.loadtxt(filename))
        y_test.append(row[7])
    gtFile.close()
    return np.asarray(X_test), np.asarray(y_test)

In [101]:
#X_test, y_test = readTestTrafficSigns('./Database/BaseDeTest/Test_Images/Images')
# Read train images features HOG
X_train, y_train = readHogTrafficSigns('./Database/BaseDApprentissage/Training_HOG/HOG/HOG_02')


## Step 2 : Data exploration
The Data is already split into test and train 

In [102]:
X_train.shape

(38969, 1568)

In [103]:
X_test, y_test = readTrafficHogTest('./Database/BaseDeTest/Test_HOG/HOG_02/')

In [104]:
#Import Random Forest Model
rfc=RandomForestClassifier(n_estimators=200, verbose=1)

In [105]:
rfc.fit(X_train,y_train)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 200 out of 200 | elapsed: 11.1min finished


RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=200,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=1, warm_start=False)

In [106]:
y_pred = rfc.predict(X_test)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 200 out of 200 | elapsed:    2.3s finished


In [107]:
y_test = y_test.astype(int)

In [108]:

# Model Accuracy, how often is the classifier correct?
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

Accuracy: 0.9576405384006335


In [109]:
from sklearn import datasets, svm, metrics

In [110]:
# Create a classifier: a support vector classifier
classifier = svm.SVC(gamma=0.001)
#fit to the trainin data
classifier.fit(X_train,y_train)

SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=0.001, kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

In [112]:
#Predict the test data with the pretrainned classifier
y_predsvm = classifier.predict(X_test)

In [114]:
# Model Accuracy, how often is the classifier correct?
print("Accuracy:",metrics.accuracy_score(y_test, y_predsvm))

Accuracy: 0.8629453681710214


In [115]:
print("Classification report for classifier %s:\n%s\n"
      % (classifier, metrics.classification_report(y_test, y_predsvm)))

Classification report for classifier SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=0.001, kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False):
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        60
           1       0.82      0.85      0.84       720
           2       0.72      0.95      0.82       750
           3       0.97      0.69      0.81       450
           4       0.94      0.95      0.95       660
           5       0.71      0.78      0.74       630
           6       0.68      0.77      0.72       150
           7       0.88      0.86      0.87       450
           8       0.91      0.83      0.87       450
           9       0.94      0.99      0.96       480
          10       0.99      0.97      0.98       660
          11       0.61      0.97      0.75       420
          1

  _warn_prf(average, modifier, msg_start, len(result))


In [116]:
print("Confusion matrix:\n%s" % metrics.confusion_matrix(y_test, y_predsvm))


Confusion matrix:
[[  0  41   4 ...   0   0   0]
 [  0 614  94 ...   0   0   0]
 [  0   5 715 ...   0   0   0]
 ...
 [  0  10   0 ...  73   0   0]
 [  0   0   2 ...   0  37   0]
 [  0   0   5 ...   2  19   0]]


## CNN
