# Traffic sign classifier

## Machine Learning Project: Build a Traffic Sign Classifier

---
**Disclaimer**

This project aims to be a study and a reworking of [this](https://github.com/lfiaschi/udacity-traffic-sign-classifier) notebook.

In [1]:
import cv2 # resize the images
import numpy as np
import pandas as pd
import os # to work with directories
from random import shuffle # shuffle data


DATASET_DIR = '/datasets/GTSRB/Images/'
ANNOTATION_FILE = './signnames.csv'



# IMAGE_SIZE = 50
# LR = 1e-3
# MODEL_NAME = 'trafficsigns--{}--{}.model'.format(LR, "2conv-basic")

Load the csv file with the annotations

In [2]:
signnames = pd.read_csv(ANNOTATION_FILE)
signnames.set_index('ClassId',inplace=True) 
print(signnames[:5])

def get_name_from_label(label):
    """Return the textual name corresponding to the numeric class id
    
    this functions look for a correspondence between the numeric class id 
    and the textual name of the class.
    
    :param label: the numeric class id
    :type label: int
    :returns: the textual name of the class
    
    :Example:

    >>> get_name_from_label(0)
    Speed limit (20km/h)   
    """    
    
    return signnames.loc[label].SignName

                     SignName
ClassId                      
0        Speed limit (20km/h)
1        Speed limit (30km/h)
2        Speed limit (50km/h)
3        Speed limit (60km/h)
4        Speed limit (70km/h)


The images are divided into folders based on their category. The *load_dataset* function create a list of all the images labeled with the name of their folder.

In [26]:

def load_dataset(path):
    """Load a dataset of images given a path
    
    this function look for images on the subfolders of the given path and label 
    them with the name of the folder where the image is stored
    
    :param path: the path where the images divided into folders are stored
    :returns: a numpy array    
    """
    dataset = []
    for subdir, dirs, files in os.walk(path): # all file on the dataset folder
        for file in files: # one image by one
            
            filename, file_extension = os.path.splitext(file) # extension control
            if file_extension == '.ppm':
                label = os.path.basename(subdir) # obtain the image label (name of the folder)
                imgPath = os.path.join(path, label, file) # the path of the image
                #img = cv2.Load_im(path, cv2.IMREAD_GRAYSCALE), (IMAGE_SIZE, IMAGE_SIZE))
                
                # load image with cv2 library
                img = cv2.imread(imgPath, 0)             
                label = int(label) # remove the zeros ahead the name of the folder
                
                # dataset.append([np.array(img), np.array(label)])
                # dataset.append([imgNp, label])
                dataset.append([np.array(img), np.array(str(label))])
                       
    # shuffle(dataset)
    # np.save('dataset.npy', dataset)
    return dataset

In [27]:
dataset = load_dataset(DATASET_DIR)
print("dataset cardinality : {}".format(len(dataset)))

#stampa del dataset sotto forma di numpy array. (giusto?)
print(dataset[:10])


dataset cardinality : 39209
[[array([[ 31,  31,  30, ...,  94, 106, 110],
       [ 30,  29,  28, ..., 127, 114, 130],
       [ 30,  27,  30, ..., 134, 123, 132],
       ..., 
       [ 88,  80, 128, ..., 116,  90,  86],
       [ 89,  79, 127, ..., 107,  87,  95],
       [ 87,  78, 123, ...,  86,  83,  78]], dtype=uint8), array('35',
      dtype='<U2')], [array([[ 64,  58,  54, ..., 223, 254, 255],
       [ 45,  53,  51, ..., 193, 255, 255],
       [ 51,  48,  51, ..., 253, 255, 255],
       ..., 
       [ 77, 145, 188, ..., 255, 255, 255],
       [ 84, 216, 215, ..., 255, 255, 255],
       [ 91, 223, 229, ..., 255, 255, 255]], dtype=uint8), array('35',
      dtype='<U2')], [array([[112, 115, 114, ..., 132, 125, 105],
       [111, 110, 110, ..., 147, 130, 109],
       [114, 115, 115, ..., 161, 129, 106],
       ..., 
       [ 86,  71,  61, ...,  58,  60,  61],
       [ 89,  54,  55, ...,  58,  58,  58],
       [ 69,  55,  54, ...,  64,  67,  64]], dtype=uint8), array('35',
      dtype='<

TODO cercare di ottenere X_train, y_train, X_valid, y_valid per allienarci con il progetto originale

## Select data to create the trainingset and the testset

In [16]:
# training_set = dataset[:-5000] # take all the images except the last 5000
# test_set = dataset[-5000:] # take the last 5000 images
# print("training set cardinality : {}".format(len(training_set)))
# print("testset cardinality : {}".format(len(test_set)))

training set cardinality : 34252
testset cardinality : 5000
