In [25]:
from sklearn.svm import LinearSVC
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import ShuffleSplit
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

from sklearn.externals import joblib
from tsr_hog_extractor import HOGExtractor
import random as rand
import numpy as np
import matplotlib.pyplot as plt
import cv2
import glob
from helpers import print_executed
from helpers import plot_learning_curve
from tqdm import tqdm
from matplotlib import pyplot as plt
import pandas as pd
import datetime
import warnings

#########################################################################
#                                                                       #
# If you use features from project files you can ommit code below       #
#                                                                       #
#########################################################################

hog_parameters = {
    'color_model': 'hsv',  # hls, hsv, yuv, ycrcb
    'svc_input_size': 64,  
    'number_of_orientations': 11,  # 6 - 12
    'pixels_per_cell': 16,  # 8, 16
    'cells_per_block': 2,  # 1, 2
    'do_transform_sqrt': True,
}

extractor = HOGExtractor(hog_parameters)
positive_images, negative_images = [], []
    
print_executed()



 [32mEXECUTED BLOCK AT 1:34:41[0m


In [26]:

# read file which describe positive dataset
positive_data_descr = pd.read_csv('dataset/positive/folders_classes.csv', delimiter=';', dtype=object)
positive_data_descr = positive_data_descr.drop('opis', axis=1)

# select only prohibitory signs which are speed limits
positive_data_descr = \
    positive_data_descr.loc[(positive_data_descr['typ'] == 'a') (positive_data_descr['numer'] == '33')]
print(positive_data_descr, '\n')

# read file which describe types of signs and select its language
sign_description = pd.read_csv('dataset/sign_types.csv', delimiter=';')
sign_description = sign_description.drop('opis', axis=1)
print(sign_description.head(5), '\n')
print_executed()


   folder typ numer
11  00011   a    6a
13  00013   a     7
18  00018   a    30
19  00019   a     1
20  00020   a     2
21  00021   a     4
22  00022   a    11
23  00023   a    15
24  00024   a   12b
25  00025   a    14
26  00026   a    29
27  00027   a    16
28  00028   a    17
29  00029   a    24
30  00030   a    32
31  00031   a   18b 

  typ    description
1   b    prohibitory
2   c      mandatory
3   d  informational 


 [32mEXECUTED BLOCK AT 1:34:51[0m


In [3]:
print("Loading negative images to memory...")

# this parameter determines if use a generator from GTSDB or manually collected images
useGenerator = True
if useGenerator:
    from model_negative_data_generator import NegativeDataGenerator
    ndg = NegativeDataGenerator()
    negative_images = ndg()
else:
    negative_paths = glob.glob('./dataset/negative/*/*.png')
    for path in tqdm(negative_paths):
        image = cv2.imread(path)
        if image.shape[0] >= 30 or image.shape[1] >= 30:
            negative_images.append(image)

print_executed()


0it [00:00, ?it/s]1it [00:00,  2.46it/s]2it [00:00,  2.81it/s]3it [00:00,  2.99it/s]4it [00:01,  3.16it/s]5it [00:01,  3.25it/s]6it [00:01,  3.31it/s]7it [00:02,  3.30it/s]8it [00:02,  3.23it/s]9it [00:02,  3.35it/s]10it [00:03,  3.10it/s]11it [00:03,  3.16it/s]12it [00:03,  2.96it/s]13it [00:04,  3.02it/s]14it [00:04,  3.14it/s]15it [00:04,  3.26it/s]16it [00:04,  3.32it/s]17it [00:05,  3.34it/s]18it [00:05,  3.28it/s]19it [00:05,  3.35it/s]20it [00:06,  3.37it/s]21it [00:06,  3.37it/s]22it [00:06,  3.36it/s]23it [00:06,  3.40it/s]24it [00:07,  2.90it/s]25it [00:07,  2.98it/s]26it [00:08,  3.01it/s]27it [00:08,  3.06it/s]28it [00:08,  3.11it/s]29it [00:08,  3.23it/s]30it [00:09,  3.26it/s]31it [00:09,  3.27it/s]32it [00:09,  3.31it/s]33it [00:10,  3.14it/s]34it [00:10,  2.83it/s]35it [00:10,  2.94it/s]36it [00:11,  2.78it/s]37it [00:11,  2.81it/s]38it [00:12,  2.67it/s]39it [00:12,  2.73it/s]40it [00:12,  2.67it/s]41it [00:13,  2.88it/s]42it 

Loading negative images to memory...
We will slice 155 negative images!
It can take a while ...
Extracted 66960 slices from negative images.

 [32mEXECUTED BLOCK AT 23:15:48[0m


In [27]:
print("Loading positive images to memory...\n")

# for each selected folder
for folder in positive_data_descr['folder']:
    # read all images from it and sort it
    paths = sorted(glob.glob('./dataset/positive/' + folder + '/*.ppm'))
    # read csv which contains bounding boxes of traffic sign
    csv = pd.read_csv('./dataset/positive/' + folder + '/GT-' + folder + '.csv', delimiter=';')
    print("\nReading folder: ", folder, '\n')
    for path in tqdm(paths):
        # read name of the image
        name = path.split('/')[4]
        # search for selected image in the csv and flatten that row to list
        row = (csv.loc[csv['Filename'] == name]).values.tolist()
        # read image and cut ROI with traffic sign, then add to list
        image = cv2.imread(path)
        if image.shape[0] >= 35 or image.shape[1] >= 35:
            image = image[row[0][3]:row[0][5], row[0][4]:row[0][6]]
            #print(image.shape)
            positive_images.append(image)

print_executed()


  0%|          | 0/1320 [00:00<?, ?it/s]  4%|▎         | 47/1320 [00:00<00:02, 466.50it/s]  8%|▊         | 103/1320 [00:00<00:02, 489.89it/s] 12%|█▏        | 154/1320 [00:00<00:02, 494.40it/s] 16%|█▌        | 212/1320 [00:00<00:02, 516.88it/s] 20%|██        | 269/1320 [00:00<00:01, 530.89it/s] 24%|██▍       | 315/1320 [00:00<00:02, 500.20it/s] 27%|██▋       | 361/1320 [00:00<00:02, 446.90it/s] 31%|███       | 404/1320 [00:00<00:02, 421.05it/s] 35%|███▍      | 461/1320 [00:00<00:01, 456.69it/s] 39%|███▉      | 517/1320 [00:01<00:01, 482.76it/s] 44%|████▎     | 576/1320 [00:01<00:01, 509.95it/s] 48%|████▊     | 632/1320 [00:01<00:01, 522.66it/s] 52%|█████▏    | 685/1320 [00:01<00:01, 521.20it/s] 57%|█████▋    | 747/1320 [00:01<00:01, 546.14it/s] 61%|██████    | 803/1320 [00:01<00:00, 540.70it/s] 65%|██████▌   | 858/1320 [00:01<00:00, 506.09it/s] 69%|██████▉   | 910/1320 [00:01<00:00, 484.28it/s] 74%|███████▎  | 971/1320 [00:01<00:00, 515.90it/s] 78%|███████▊  | 1024/1

Loading positive images to memory...


Reading folder:  00011 


Reading folder:  00013 


Reading folder:  00018 


Reading folder:  00019 


Reading folder:  00020 


Reading folder:  00021 


Reading folder:  00022 


Reading folder:  00023 


Reading folder:  00024 


Reading folder:  00025 


Reading folder:  00026 


Reading folder:  00027 


Reading folder:  00028 


Reading folder:  00029 


Reading folder:  00030 


Reading folder:  00031 


 [32mEXECUTED BLOCK AT 1:36:1[0m


In [29]:
# stack already read images to np array
# shapes of arrays can be different, because we have stack images with different shapes to one array
positive_images, negative_images = np.asarray(positive_images), np.asarray(negative_images)
total_signs, total_nonsigns = positive_images.shape[0], negative_images.shape[0]

print("Positive images shape: ", len(positive_images), "type: ", type(positive_images))
print("Negative images shape: ", len(negative_images), "type: ", type(negative_images))
print("Total positive signs: ", total_signs, "\nTotal nonsigns: ", total_nonsigns)

positive_features, negative_features = [], []
print_executed()


Positive images shape:  9893 type:  <class 'numpy.ndarray'>
Negative images shape:  1 type:  <class 'numpy.ndarray'>
Total positive signs:  9893 
Total nonsigns:  0

 [32mEXECUTED BLOCK AT 1:37:49[0m


In [30]:
print("Extracting features from traffic signs...")

for img in tqdm(positive_images):
    positive_features.append(extractor.features(img)) 

print("Extracting features from non traffic signs...")
for img in tqdm(negative_images):
    negative_features.append(extractor.features(img)) 

positive_features = np.asarray(positive_features)
negative_features = np.asarray(negative_features)

# You can safe your features to file
# np.save('trained_models/negative_features', negative_features)
# np.save('trained_models/positive_features', positive_features)

print_executed()


  0%|          | 0/9893 [00:00<?, ?it/s]  0%|          | 6/9893 [00:00<03:02, 54.10it/s]  0%|          | 14/9893 [00:00<02:48, 58.70it/s]  0%|          | 21/9893 [00:00<02:45, 59.64it/s]  0%|          | 29/9893 [00:00<02:33, 64.44it/s]  0%|          | 37/9893 [00:00<02:25, 67.71it/s]  0%|          | 45/9893 [00:00<02:21, 69.53it/s]  1%|          | 53/9893 [00:00<02:19, 70.31it/s]  1%|          | 61/9893 [00:00<02:17, 71.49it/s]  1%|          | 68/9893 [00:00<02:18, 70.99it/s]  1%|          | 76/9893 [00:01<02:17, 71.17it/s]  1%|          | 84/9893 [00:01<02:16, 72.08it/s]  1%|          | 92/9893 [00:01<02:20, 69.67it/s]  1%|          | 99/9893 [00:01<02:22, 68.49it/s]  1%|          | 107/9893 [00:01<02:20, 69.82it/s]  1%|          | 114/9893 [00:01<02:20, 69.63it/s]  1%|          | 121/9893 [00:01<02:21, 69.30it/s]  1%|▏         | 128/9893 [00:01<02:23, 67.96it/s]  1%|▏         | 136/9893 [00:01<02:23, 68.19it/s]  1%|▏         | 143/9893 [00:02<02:23, 67.82it/s]  2

Extracting features from traffic signs...
Extracting features from non traffic signs...


error: OpenCV(3.4.2) /opt/concourse/worker/volumes/live/9523d527-1b9e-48e0-7ed0-a36adde286f0/volume/opencv-suite_1535558719691/work/modules/imgproc/src/color.hpp:253: error: (-215:Assertion failed) VScn::contains(scn) && VDcn::contains(dcn) && VDepth::contains(depth) in function 'CvtHelper'


In [18]:
#########################################################################
#                                                                       #
# If you use features from project files you can start here             #
#                                                                       #
#########################################################################

positive_features = np.load('trained_models/positive_features.npy')
negative_features = np.load('trained_models/negative_features.npy')

positive_features = np.asarray(positive_features)
negative_features = np.asarray(negative_features)

# operation to balance a size of negative features
negative_features = negative_features[:positive_features.shape[0]]

total_signs = positive_features.shape[0]
total_nonsigns = negative_features.shape[0]

print_executed()


 [32mEXECUTED BLOCK AT 0:58:51[0m


In [19]:
print("Scaling features...")
print(len(positive_features), len(positive_features[0]))
print(len(negative_features), len(negative_features[0]))

unscaled_x = np.vstack((positive_features, negative_features)).astype(np.float64)
scaler = StandardScaler().fit(unscaled_x)
x = scaler.transform(unscaled_x)
y = np.hstack((np.ones(total_signs), np.zeros(total_nonsigns)))

print(" x shape: ", x.shape, " y shape: ", y.shape)

print_executed()


Scaling features...
9183 1188
9183 1188
 x shape:  (18366, 1188)  y shape:  (18366,)

 [32mEXECUTED BLOCK AT 0:59:14[0m


In [15]:
print("Plot learning curve for dataset")

svc = SVC(probability=True)
title = "Learning curves for Supported Vector Machine"
warnings.filterwarnings("ignore")
cv = ShuffleSplit(n_splits=10, test_size=0.2, random_state=0)
plot_learning_curve(svc, title, x, y, cv=cv, ylim=(0.7, 1.01))

print_executed()


Plot learning curve for dataset



 [32mEXECUTED BLOCK AT 18:7:37[0m


In [20]:
print("Training classifier and saving models...")

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=rand.randint(1, 100))

svc = SVC(probability=True)
warnings.filterwarnings("ignore")
svc.fit(x_train, y_train)
accuracy = svc.score(x_test, y_test)
y_pred_proba = svc.predict_proba(x_test)
y_pred = np.argmax(y_pred_proba, axis=1)
print(classification_report(y_test, y_pred))
print("overall accuracy:", np.round(accuracy, 4))
print(confusion_matrix(y_test, y_pred))
print(y_pred_proba)


# save models
now = datetime.datetime.now()
created = str(now.year)+str(now.month)+str(now.day)  # +str(now.hour)+str(now.minute)
joblib.dump(svc, './trained_models/SVC_'+created+'.pkl')
joblib.dump(scaler, './trained_models/scaler_'+created+'.pkl')


print_executed()


Training classifier and saving models
              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00      1826
         1.0       1.00      1.00      1.00      1848

   micro avg       1.00      1.00      1.00      3674
   macro avg       1.00      1.00      1.00      3674
weighted avg       1.00      1.00      1.00      3674

overall accuracy: 0.9992
[[1826    0]
 [   2 1846]]
[[3.00000090e-14 1.00000000e+00]
 [9.99976825e-01 2.31754257e-05]
 [9.99412950e-01 5.87050409e-04]
 ...
 [3.00000090e-14 1.00000000e+00]
 [3.17807850e-07 9.99999682e-01]
 [1.36973522e-12 1.00000000e+00]]

 [32mEXECUTED BLOCK AT 1:1:37[0m


In [14]:
print("Test prediction of single ROI")

f1 = extractor.features(negative_images[rand.randint(0, len(negative_images))])
f2 = extractor.features(positive_images[rand.randint(0, len(positive_images))])
f3 = extractor.features(positive_images[rand.randint(0, len(positive_images))])
f4 = extractor.features(negative_images[rand.randint(0, len(negative_images))])

fA = scaler.transform([f1, f2, f3, f4])
print(svc.predict(fA))


f5 = extractor.features(negative_images[rand.randint(2000, total_nonsigns)])
f6 = extractor.features(positive_images[rand.randint(2000, total_signs)])
f7 = extractor.features(positive_images[rand.randint(2000, total_signs)])
f8 = extractor.features(negative_images[rand.randint(2000, total_nonsigns)])

fB = scaler.transform([f5, f6, f7, f8])
print(svc.predict_proba(fB))



Test prediction of single ROI


IndexError: list index out of range