In [27]:
from sklearn.svm import LinearSVC
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import ShuffleSplit
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

from sklearn.externals import joblib
from hog_extractor import HOGExtractor
import random as rand
import numpy as np
import matplotlib.pyplot as plt
import cv2
import glob
from helpers import print_executed
from helpers import plot_learning_curve
from tqdm import tqdm
from matplotlib import pyplot as plt
import pandas as pd
import datetime
import warnings


hog_parameters = {
    'color_model': 'hsv',  # hls, hsv, yuv, ycrcb
    'svc_input_size': 64,  
    'number_of_orientations': 11,  # 6 - 12
    'pixels_per_cell': 16,  # 8, 16
    'cells_per_block': 2,  # 1, 2
    'do_transform_sqrt': True,
}


extractor = HOGExtractor(hog_parameters)
positive_images, negative_images = [], []
    
print_executed()



 [32mEXECUTED BLOCK AT 20:6:57[0m


In [28]:
# read file which describe positive dataset
positive_data_descr = pd.read_csv('dataset/positive/folders_classes.csv', delimiter=';', dtype=object)
positive_data_descr = positive_data_descr.drop('opis', axis=1)

# select only prohibitory signs which are speed limits
positive_data_descr = \
    positive_data_descr.loc[(positive_data_descr['typ'] == 'b') & (positive_data_descr['numer'] == '33')]
print(positive_data_descr, '\n')

# read file which describe types of signs and select its language
sign_description = pd.read_csv('dataset/sign_types.csv', delimiter=';')
sign_description = sign_description.drop('opis', axis=1)
print(sign_description.head(5), '\n')
print_executed()


  folder typ numer
0  00000   b    33
1  00001   b    33
2  00002   b    33
3  00003   b    33
4  00004   b    33
5  00005   b    33
7  00007   b    33
8  00008   b    33 

  typ    description
1   b    prohibitory
2   c      mandatory
3   d  informational 


 [32mEXECUTED BLOCK AT 20:7:3[0m


In [29]:
print("Loading negative images to memory...")

negative_paths = glob.glob('./dataset/negative/*/*.png')
for path in tqdm(negative_paths[:10000]):
    image = cv2.imread(path)
    if image.shape[0] >= 30 or image.shape[1] >= 30:
        negative_images.append(image)

# good_width, good_height, _ = negative_images[0].shape # zastanowić się czy to jest potrzebne!!!

print_executed()


  0%|          | 0/3000 [00:00<?, ?it/s] 13%|█▎        | 391/3000 [00:00<00:00, 3906.06it/s] 25%|██▌       | 764/3000 [00:00<00:00, 3850.17it/s] 36%|███▌      | 1083/3000 [00:00<00:00, 3621.28it/s] 47%|████▋     | 1411/3000 [00:00<00:00, 3510.74it/s] 56%|█████▌    | 1672/3000 [00:00<00:00, 3142.57it/s] 65%|██████▍   | 1947/3000 [00:00<00:00, 3013.42it/s] 74%|███████▎  | 2210/3000 [00:00<00:00, 2515.40it/s] 82%|████████▏ | 2449/3000 [00:00<00:00, 2332.62it/s] 92%|█████████▏| 2749/3000 [00:00<00:00, 2499.14it/s]100%|██████████| 3000/3000 [00:01<00:00, 2855.06it/s]


Loading negative images to memory...

 [32mEXECUTED BLOCK AT 20:7:8[0m


In [30]:
print("Loading positive images to memory...\n")

# for each selected folder
for folder in positive_data_descr['folder']:
    # read all images from it and sort it
    paths = sorted(glob.glob('./dataset/positive/' + folder + '/*.ppm'))
    # read csv which contains bounding boxes of traffic sign
    csv = pd.read_csv('./dataset/positive/' + folder + '/GT-' + folder + '.csv', delimiter=';')
    print("\nReading folder: ", folder, '\n')
    for path in tqdm(paths):
        # read name of the image
        name = path.split('/')[4]
        # search for selected image in the csv and flatten that row to list
        row = (csv.loc[csv['Filename'] == name]).values.tolist()
        # read image and cut ROI with traffic sign, then add to list
        image = cv2.imread(path)
        if image.shape[0] >= 35 or image.shape[1] >= 35:
            image = image[row[0][3]:row[0][5], row[0][4]:row[0][6]]
            #print(image.shape)
            positive_images.append(image)

print_executed()


  0%|          | 0/210 [00:00<?, ?it/s] 21%|██▏       | 45/210 [00:00<00:00, 446.96it/s] 43%|████▎     | 90/210 [00:00<00:00, 445.43it/s] 66%|██████▌   | 138/210 [00:00<00:00, 452.59it/s] 87%|████████▋ | 183/210 [00:00<00:00, 450.54it/s]100%|██████████| 210/210 [00:00<00:00, 455.45it/s]
  0%|          | 0/2220 [00:00<?, ?it/s]  1%|▏         | 28/2220 [00:00<00:07, 275.34it/s]  2%|▏         | 51/2220 [00:00<00:08, 253.80it/s]  3%|▎         | 74/2220 [00:00<00:08, 244.91it/s]  5%|▍         | 106/2220 [00:00<00:08, 262.26it/s]  6%|▌         | 138/2220 [00:00<00:07, 274.95it/s]  8%|▊         | 168/2220 [00:00<00:07, 280.27it/s]  9%|▊         | 194/2220 [00:00<00:07, 254.59it/s] 10%|▉         | 218/2220 [00:00<00:08, 235.26it/s] 12%|█▏        | 259/2220 [00:00<00:07, 269.26it/s] 13%|█▎        | 292/2220 [00:01<00:06, 283.52it/s] 15%|█▍        | 322/2220 [00:01<00:06, 288.16it/s] 16%|█▌        | 355/2220 [00:01<00:06, 298.61it/s] 18%|█▊        | 399/2220 [00:01<00:05, 330

Loading positive images to memory...


Reading folder:  00000 


Reading folder:  00001 


Reading folder:  00002 


Reading folder:  00003 


Reading folder:  00004 


Reading folder:  00005 


Reading folder:  00007 


Reading folder:  00008 


 [32mEXECUTED BLOCK AT 20:7:50[0m


In [31]:
# stack already read images to np array
# shapes of arrays can be different, because we have stack images with different shapes to one array
positive_images, negative_images = np.asarray(positive_images), np.asarray(negative_images)
total_signs, total_nonsigns = positive_images.shape[0], negative_images.shape[0]

print("Positive images shape: ", len(positive_images), "type: ", type(positive_images))
print("Negative images shape: ", len(negative_images), "type: ", type(negative_images))
print("Total positive signs: ", total_signs, "\nTotal nonsigns: ", total_nonsigns)

positive_features, negative_features = [], []
print_executed()


Positive images shape:  9183 type:  <class 'numpy.ndarray'>
Negative images shape:  3000 type:  <class 'numpy.ndarray'>
Total positive signs:  9183 
Total nonsigns:  3000

 [32mEXECUTED BLOCK AT 20:8:5[0m


In [32]:
print("Extracting features from traffic signs...")

for img in tqdm(positive_images):
    positive_features.append(extractor.features(img)) 

print("Extracting features from non traffic signs...")
for img in tqdm(negative_images):
    negative_features.append(extractor.features(img)) 

positive_features = np.asarray(positive_features)
negative_features = np.asarray(negative_features)

print_executed()


  0%|          | 0/9183 [00:00<?, ?it/s]  0%|          | 6/9183 [00:00<02:38, 57.90it/s]  0%|          | 12/9183 [00:00<02:39, 57.32it/s]  0%|          | 19/9183 [00:00<02:33, 59.85it/s]  0%|          | 26/9183 [00:00<02:29, 61.22it/s]  0%|          | 33/9183 [00:00<02:24, 63.22it/s]  0%|          | 41/9183 [00:00<02:18, 66.02it/s]  1%|          | 47/9183 [00:00<02:24, 63.19it/s]  1%|          | 53/9183 [00:00<02:28, 61.31it/s]  1%|          | 60/9183 [00:00<02:26, 62.40it/s]  1%|          | 67/9183 [00:01<02:32, 59.89it/s]  1%|          | 73/9183 [00:01<02:32, 59.65it/s]  1%|          | 80/9183 [00:01<02:29, 61.01it/s]  1%|          | 87/9183 [00:01<02:36, 57.97it/s]  1%|          | 94/9183 [00:01<02:32, 59.48it/s]  1%|          | 100/9183 [00:01<02:36, 57.86it/s]  1%|          | 106/9183 [00:01<02:38, 57.30it/s]  1%|          | 112/9183 [00:01<02:37, 57.74it/s]  1%|▏         | 119/9183 [00:01<02:33, 59.06it/s]  1%|▏         | 125/9183 [00:02<02:45, 54.68it/s]  1%

Extracting features from traffic signs...
Extracting features from non traffic signs...

 [32mEXECUTED BLOCK AT 20:12:21[0m


In [33]:
print("Scaling features...")
print(len(positive_features), len(positive_features[0]))
print(len(negative_features), len(negative_features[0]))

unscaled_x = np.vstack((positive_features, negative_features)).astype(np.float64)
scaler = StandardScaler().fit(unscaled_x)
x = scaler.transform(unscaled_x)
y = np.hstack((np.ones(total_signs), np.zeros(total_nonsigns)))

print(" x shape: ", x.shape, " y shape: ", y.shape)

print_executed()


Scaling features...
9183 1188
3000 1188
 x shape:  (12183, 1188)  y shape:  (12183,)

 [32mEXECUTED BLOCK AT 20:12:52[0m


In [15]:
print("Plot learning curve for dataset")

svc = SVC(probability=True)
title = "Learning curves for Supported Vector Machine"
warnings.filterwarnings("ignore")
cv = ShuffleSplit(n_splits=10, test_size=0.2, random_state=0)
plot_learning_curve(svc, title, x, y, cv=cv, ylim=(0.7, 1.01))

print_executed()


Plot learning curve for dataset



 [32mEXECUTED BLOCK AT 18:7:37[0m


In [34]:
print("Training classifier and saving models")

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=rand.randint(1, 100))

svc = SVC(probability=True)
warnings.filterwarnings("ignore")
svc.fit(x_train, y_train)
accuracy = svc.score(x_test, y_test)
y_pred_proba = svc.predict_proba(x_test)
y_pred = np.argmax(y_pred_proba, axis=1)
print(classification_report(y_test, y_pred))
print("overall accuracy:", np.round(accuracy, 4))
print(confusion_matrix(y_test, y_pred))
print(y_pred_proba)


# save models
now = datetime.datetime.now()
created = str(now.year)+str(now.month)+str(now.day)  # +str(now.hour)+str(now.minute)
joblib.dump(svc, './trained_models/SVC_'+created+'.pkl')
joblib.dump(scaler, './trained_models/scaler_'+created+'.pkl')


print_executed()


Training classifier and saving models
              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00       602
         1.0       1.00      1.00      1.00      1835

   micro avg       1.00      1.00      1.00      2437
   macro avg       1.00      1.00      1.00      2437
weighted avg       1.00      1.00      1.00      2437

overall accuracy: 0.9979
[[ 601    1]
 [   2 1833]]
[[3.37716793e-11 1.00000000e+00]
 [3.00000090e-14 1.00000000e+00]
 [3.00000090e-14 1.00000000e+00]
 ...
 [2.50127494e-10 1.00000000e+00]
 [9.99851603e-01 1.48396759e-04]
 [3.00000090e-14 1.00000000e+00]]

 [32mEXECUTED BLOCK AT 20:14:40[0m




In [35]:
print("Test prediction of single ROI")

f1 = extractor.features(negative_images[rand.randint(0, 2000)])
f2 = extractor.features(positive_images[rand.randint(0, 2000)])
f3 = extractor.features(positive_images[rand.randint(0, 2000)])
f4 = extractor.features(negative_images[rand.randint(0, 2000)])

fA = scaler.transform([f1, f2, f3, f4])
print(svc.predict(fA))


f5 = extractor.features(negative_images[rand.randint(2000, total_nonsigns)])
f6 = extractor.features(positive_images[rand.randint(2000, total_signs)])
f7 = extractor.features(positive_images[rand.randint(2000, total_signs)])
f8 = extractor.features(negative_images[rand.randint(2000, total_nonsigns)])

fB = scaler.transform([f5, f6, f7, f8])
print(svc.predict(fB))



Test prediction of single ROI
[0. 1. 1. 0.]
[0. 1. 1. 0.]
