In [1]:
import cv2
import numpy as np 
import os

import tensorflow as tf

from sklearn.model_selection import train_test_split

import traffic

In [2]:
EPOCHS = 10
IMG_WIDTH = 30
IMG_HEIGHT = 30
NUM_CATEGORIES = 43
TEST_SIZE = 0.4

In [3]:
img = cv2.imread('./gtsrb/0/00000_00000.ppm', cv2.IMREAD_COLOR)
img

array([[[ 80,  78,  75],
        [ 78,  76,  74],
        [ 84,  87,  86],
        ...,
        [ 75,  75,  68],
        [ 68,  69,  65],
        [ 66,  67,  66]],

       [[ 86,  84,  83],
        [ 82,  80,  80],
        [ 84,  89,  90],
        ...,
        [ 78,  77,  73],
        [ 75,  78,  76],
        [ 78,  80,  80]],

       [[ 80,  78,  78],
        [ 86,  85,  86],
        [ 91,  90,  93],
        ...,
        [ 72,  74,  72],
        [ 69,  74,  73],
        [ 74,  78,  78]],

       ...,

       [[139, 134, 133],
        [132, 127, 122],
        [124, 121, 112],
        ...,
        [ 89,  94,  94],
        [ 91,  98,  97],
        [ 99, 103,  99]],

       [[ 99,  95,  91],
        [103,  98,  91],
        [ 89,  85,  74],
        ...,
        [ 95, 101, 103],
        [104, 113, 110],
        [ 98, 104,  96]],

       [[ 87,  84,  85],
        [107, 101,  95],
        [ 79,  74,  61],
        ...,
        [ 95, 102, 102],
        [ 90, 102,  99],
        [ 89,  97,  90]]

In [4]:
# cv2.imshow('image', img)
# cv2.waitKey(0)
# cv2.destroyAllWindows()

In [5]:
images, labels = traffic.load_data('gtsrb-small')

print(f'Total: {len(images)}')
print(len(labels))

Total: 840
840


## Benchmark logistic regression model
### Small dataset

In [6]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import KFold, cross_val_score, train_test_split
from sklearn.metrics import accuracy_score, classification_report

In [7]:
# prepare X
X_raw = np.array(images)
X = X_raw.reshape((X_raw.shape[0], -1))
X.shape

(840, 2700)

In [8]:
# prepare y
y = np.array(labels)
y.shape

(840,)

In [9]:
# rescale 
X = X/255

In [10]:
# prepare train-test data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123)

In [11]:
# initiate cross-validation
cv = KFold(n_splits=10, random_state=123, shuffle=True)
# create model
model = LogisticRegression(verbose=1)
# evaluate model
scores = cross_val_score(model, X_train, y_train, scoring='accuracy', cv=cv)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    1.4s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.9s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.8s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.8s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_j

In [12]:
# report performance
print(f'Accuracy: {np.mean(scores):.3f} with std: {np.std(scores):.3f}')

Accuracy: 0.999 with std: 0.004


### Large dataset

In [13]:
images, labels = traffic.load_data('gtsrb')

print(f'Total: {len(images)}')
print(len(labels))

Total: 26640
26640


In [14]:
# prepare X
X_raw = np.array(images)
X = X_raw.reshape((X_raw.shape[0], -1))
# rescale X
X = X/255
# prepare y
y = np.array(labels)
y.shape

# prepare train-test data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123)
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(21312, 2700)
(5328, 2700)
(21312,)
(5328,)


In [15]:
# initiate cross-validation
cv = KFold(n_splits=10, random_state=123, shuffle=True)
# create model
model = LogisticRegression(verbose=1)
# evaluate model
scores = cross_val_score(model, X_train, y_train, scoring='accuracy', cv=cv, error_score='raise')

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   49.1s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   52.3s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   54.3s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   50.2s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   50.3s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   56.9s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_j

In [16]:
# report performance
print(f'Accuracy: {np.mean(scores):.3f} with std: {np.std(scores):.3f}')

Accuracy: 0.926 with std: 0.005


In [17]:
scores

array([0.9282364 , 0.934803  , 0.92679493, 0.9221023 , 0.92820272,
       0.92867198, 0.91834819, 0.93007977, 0.92397935, 0.91787893])