 # Road Sign Recognition system

In [0]:
import pathlib as path
import skimage as skimg
import matplotlib.pyplot as plt
from functional import seq
from random import sample, seed
import numpy as np
from rsr import Data
from loguru import logger

logger.remove()


 # Loading dataset
 Load the data from the `/cropped/` directory, convert the directory name to integer and used them as labels

 Dataset distribution

In [0]:
def printStat(dataset, groups):
    for group in groups:
        print(f'{Data.fromIndex(group[0])}: {len(group[1])}')
    print(f'Total: {len(dataset)}')

    plt.bar(seq(groups).select(lambda group: Data.fromIndex(group[0])).to_list(), seq(groups).select(lambda group: len(group[1])).to_list())
    plt.show()

printStat(dataset, groups)

 # Visualizing dataset
 Displaying 10 random samples for each label

 _Here, the random seed is set to 0_

In [0]:
seed(0)
# todo: rename to show list of images
def showDataset(dataset, title, cols = 10, figsize = (15, 15), y = 0.6):
    rows = len(dataset) // cols
    fig, axes = plt.subplots(rows, cols, figsize=figsize)
    cmap = None
    for axe, image in zip(axes.ravel(), seq(dataset).select(lambda data: data.image)):
        axe.axis('off')
        if len(image.shape)< 3 or image.shape[-1] < 3:
            cmap = "gray"
        axe.imshow(image, cmap = cmap)
    
    fig.suptitle(title, y=y)
    plt.show()
    return

def sampleDataset(dataset, count=10):
    groups = groupDataset(dataset)
    for group in groups:
        showDataset(sample(group[1], count), Data.fromIndex(group[0]), cols=count)
    return


In [0]:
sampleDataset(dataset)



 # Image preprocessing

In [0]:
def minMaxAverage(title, items):
    print(f'Minimum {title}: {seq(items).min()}')
    print(f'Maximum {title}: {seq(items).max()}')
    print(f'Average {title}: {seq(items).average()}')


 Average width

 Average height

 Here, we only sample 100 images for testing out the preprocessing steps

In [0]:
# validation set
validationSet = sample(dataset, 100)
showDataset(validationSet, "Validation set", 5, y = 0.9)


In [0]:
# grayscale
for data in validationSet:
    data.grayscale()

showDataset(validationSet, "Grayscale", 5, y = 0.9)


In [0]:
# histogram equalization
for data in validationSet:
    data.equalize()

showDataset(validationSet, "Histogram equalization", 5, y = 0.9)


In [0]:
# resize
for data in validationSet:
    data.resize()

showDataset(validationSet, "Resize", 5, y = 0.9)


In [0]:
minMaxAverage("width", seq(validationSet).select(lambda data: data.image.shape[0]))
minMaxAverage("height", seq(validationSet).select(lambda data: data.image.shape[1]))
seq(validationSet).select(lambda data: data.image.shape)


# data augmentation? 

# sampleDataset()


In [0]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import confusion_matrix


def train(dataset):
    data = np.array(seq(dataset).select(lambda data: data.image).to_list()).reshape(len(dataset), -1)
    labels = np.array(seq(dataset).select(lambda data: data.label).to_list()).reshape(len(dataset), -1).reshape(-1)

    trainingData, testingData, trainingLabels, testingLabels = train_test_split(data, labels, random_state=0)
    print(f'trainingData: {trainingData.shape}, trainingLabels: {trainingLabels.shape}')
    print(f'testingData: {testingData.shape}, testingLabels: {testingLabels.shape}')

    minMaxAverage("training", trainingData.ravel())
    minMaxAverage("testing", testingData.ravel())

    scaler = StandardScaler()
    trainingData = scaler.fit_transform(trainingData)
    testingData = scaler.fit_transform(testingData)
    print("---")
    minMaxAverage("training", trainingData.ravel())
    minMaxAverage("testing", testingData.ravel())

    mlp = MLPClassifier(random_state=0)
    mlp.fit(trainingData, trainingLabels)
    print(mlp)
    print(f'training accuracy: {mlp.score(trainingData, trainingLabels)}')
    print(f'testing accuracy: {mlp.score(testingData, testingLabels)}')

    predictedLabels = mlp.predict(testingData)
    print(f'confusion matrix: \n {confusion_matrix(testingLabels, predictedLabels)}')

    

train(dataset)


 # Normalization