In [3]:
import numpy as np
import cv2
import glob
import time
import pickle

import matplotlib.image as mpimg
import matplotlib.pyplot as plt

from sklearn.svm import LinearSVC
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle

from skimage.feature import hog

import utils

## Load the Datasets

In [4]:
cars_ds_file = 'cars_data.p'
with open(cars_ds_file, mode='rb') as f:
    ds = pickle.load(f)
cars_train = ds['cars_train']
notcars_train = ds['notcars_train']
cars_val = ds['cars_val']
notcars_val = ds['notcars_val']
cars_test = ds['cars_test']
notcars_test = ds['notcars_test']

print("dataset loaded")

dataset loaded


## Extract Histogram features

In [5]:
color_space = 'HLS' # Can be RGB, HSV, LUV, HLS, YUV, YCrCb
spatial_size = (16, 16)
hist_bins = 32
orient = 9
pix_per_cell = 8
cell_per_block = 2
hog_channel = 'ALL'
spatial_features = True
hist_features = True
hog_features = True

t=time.time()
print('Starting extract Car Features')
cars_train_features = utils.extract_features_bulk(cars_train,color_space, spatial_size,hist_bins, orient, 
                               pix_per_cell, cell_per_block, hog_channel, spatial_features, hist_features, hog_features)
cars_val_features = utils.extract_features_bulk(cars_val,color_space, spatial_size,hist_bins, orient, 
                               pix_per_cell, cell_per_block, hog_channel, spatial_features, hist_features, hog_features)
cars_test_features = utils.extract_features_bulk(cars_test,color_space, spatial_size,hist_bins, orient, 
                               pix_per_cell, cell_per_block, hog_channel, spatial_features, hist_features, hog_features)

print('Starting extract Not Car Features')
notcars_train_features = utils.extract_features_bulk(notcars_train,color_space, spatial_size,hist_bins, orient, 
                               pix_per_cell, cell_per_block, hog_channel, spatial_features, hist_features, hog_features)
notcars_val_features = utils.extract_features_bulk(notcars_val,color_space, spatial_size,hist_bins, orient, 
                               pix_per_cell, cell_per_block, hog_channel, spatial_features, hist_features, hog_features)
notcars_test_features = utils.extract_features_bulk(notcars_test,color_space, spatial_size,hist_bins, orient, 
                               pix_per_cell, cell_per_block, hog_channel, spatial_features, hist_features, hog_features)
t2 = time.time()
print('Features Extracted in: {}s'.format(round(t2-t, 2)))

Starting extract Car Features
Starting extract Not Car Features
Features Extracted in: 91.2s


## Normalize the features

In [6]:
# Create an array stack of feature vectors
X = np.vstack((cars_train_features, cars_val_features, cars_test_features, 
               notcars_train_features, notcars_val_features, notcars_test_features)).astype(np.float64)
# Fit a per-column scaler
X_scaler = StandardScaler().fit(X)
# Apply the scaler to X
scaled_X = X_scaler.transform(X)

In [7]:
print(scaled_X)

[[ 1.20444453 -0.50679381 -0.53345294 ..., -0.11410046 -0.32320719
   0.81947684]
 [-0.87703451  1.20109641 -0.62255031 ..., -0.70628552 -0.29875005
   0.17515005]
 [-0.32630026 -0.96528784 -0.40832241 ..., -0.89872948 -0.81191714
  -0.66705859]
 ..., 
 [ 1.51984988 -0.97904265  0.17590576 ..., -0.93618384 -0.97603736
  -0.84128332]
 [-0.29675449 -0.37612307 -0.46834722 ...,  0.22227731  1.68476253
   0.09464588]
 [ 0.47215278  1.19421883 -0.2478553  ..., -0.62191524  0.68598843
  -0.50925073]]


In [8]:
# Split back the training, validation and test features for Cars
n_cars_train_features = len(cars_train_features)
n_cars_val_features = len(cars_val_features)
n_cars_test_features = len(cars_test_features)

cars_train_features = scaled_X[:n_cars_train_features]
cars_val_features = scaled_X[n_cars_train_features:n_cars_train_features+n_cars_val_features]
cars_test_features = scaled_X[n_cars_train_features+n_cars_val_features:n_cars_train_features+n_cars_val_features+n_cars_test_features]

print('{} = {}'.format(n_cars_train_features, len(cars_train_features)))
print('{} = {}'.format(n_cars_val_features, len(cars_val_features)))
print('{} = {}'.format(n_cars_test_features, len(cars_test_features)))

6154 = 6154
1758 = 1758
880 = 880


In [9]:
# Split back the training, validation and test features for NotCars
n_notcars_train_features = len(notcars_train_features)
n_notcars_val_features = len(notcars_val_features)
n_notcars_test_features = len(notcars_test_features)

notcars_train_features = scaled_X[:n_notcars_train_features]
notcars_val_features = scaled_X[n_notcars_train_features:n_notcars_train_features+n_notcars_val_features]
notcars_test_features = scaled_X[n_notcars_train_features+n_notcars_val_features:n_notcars_train_features+n_notcars_val_features+n_notcars_test_features]

print('{} = {}'.format(n_notcars_train_features, len(notcars_train_features)))
print('{} = {}'.format(n_notcars_val_features, len(notcars_val_features)))
print('{} = {}'.format(n_notcars_test_features, len(notcars_test_features)))

6277 = 6277
1794 = 1794
897 = 897


## Create the arrays for the features and labels to be trained, validate and tested

In [10]:
# ## Create the labels vector, 1 if car, 0 if not car
y_train = np.hstack((np.ones(n_cars_train_features), np.zeros(n_notcars_train_features)))
y_val = np.hstack((np.ones(n_cars_val_features), np.zeros(n_notcars_val_features)))
y_test = np.hstack((np.ones(n_cars_test_features), np.zeros(n_notcars_test_features)))

# # Create an array stack of feature vectors

_s1 = n_cars_train_features
_s2 = _s1 + n_cars_val_features
_s3 = _s2 + n_cars_test_features
_s4 = _s3 + n_notcars_train_features
_s5 = _s4 + n_notcars_val_features


X_train = np.vstack((scaled_X[:_s1],scaled_X[_s3:_s4]))
X_val = np.vstack((scaled_X[_s1:_s2],scaled_X[_s4:_s5]))
X_test = np.vstack((scaled_X[_s2:_s3],scaled_X[_s5:]))

X_train,y_train = shuffle(X_train,y_train,random_state=42)
X_val,y_val = shuffle(X_val,y_val,random_state=42)
X_test,y_test = shuffle(X_test,y_test,random_state=42)

print('{} = {}'.format(len(X_train), len(y_train)))
print('{} = {}'.format(len(X_val), len(y_val)))
print('{} = {}'.format(len(X_test), len(y_test)))

12431 = 12431
3552 = 3552
1777 = 1777


## Build the Model, tunning, test accuracy and predict

In [11]:
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.model_selection import GridSearchCV

C_range = np.logspace(-2, 1, 10)
param_grid = dict(C=C_range)
cv = StratifiedShuffleSplit(n_splits=5, test_size=0.2, random_state=42)
grid = GridSearchCV(LinearSVC(), param_grid=param_grid, cv=cv)
grid.fit(X_val, y_val)

print("The best parameters are %s with a score of %0.2f"
      % (grid.best_params_, grid.best_score_))

The best parameters are {'C': 0.01} with a score of 0.99


In [19]:
print('Using:',orient,'orientations',pix_per_cell,
    'pixels per cell and', cell_per_block,'cells per block')
print('Feature vector length:', len(X_train[0]))
# Use a linear SVC
svc = LinearSVC(C=0.01)
# Check the training time for the SVC
t=time.time()
svc.fit(X_train, y_train)
t2 = time.time()
print(round(t2-t, 2), 'Seconds to train SVC...')
# Check the score of the SVC
print('Validation Accuracy of SVC = ', round(svc.score(X_val, y_val), 4))
print('Test Accuracy of SVC = ', round(svc.score(X_test, y_test), 4))
# Check the prediction time for a single sample
t=time.time()
n_predict = 500
t2 = time.time()
print(round(t2-t, 5), 'Seconds to predict', n_predict,'labels with SVC')

Using: 9 orientations 8 pixels per cell and 2 cells per block
Feature vector length: 6156
10.94 Seconds to train SVC...
Validation Accuracy of SVC =  0.9901
Test Accuracy of SVC =  0.9859
0.0 Seconds to predict 500 labels with SVC


## Saving the data for Part 3

In [20]:
pickle_file = 'preprocessed_dataset.p'
print('Saving pre processed...')
try:
    with open(pickle_file, 'wb') as pfile:
        pickle.dump(
            {                    
                'X_train': X_train,
                'X_val': X_val,
                'X_test': X_test,
                'y_train': y_train,
                'y_val': y_val,
                'y_test': y_test,
                
            },
            pfile, pickle.HIGHEST_PROTOCOL)
except Exception as e:
    print('Unable to save data to', pickle_file, ':', e)
    raise
    
print('preprocessed_dataset.p saved.')


pickle_file = 'classifier_data.p'
print('Saving classifier file...')
try:
    with open(pickle_file, 'wb') as pfile:
        pickle.dump(
            {   'svc':svc, 
                'X_scaler': X_scaler,
                'color_space': color_space,
                'spatial_size': spatial_size,
                'hist_bins': hist_bins,
                'orient': orient,
                'pix_per_cell': pix_per_cell,
                'cell_per_block': cell_per_block,
                'hog_channel': hog_channel,
                'spatial_features': spatial_features,
                'hist_features': hist_features,
                'hog_features':hog_features
            },
            pfile, pickle.HIGHEST_PROTOCOL)
except Exception as e:
    print('Unable to save data to', pickle_file, ':', e)
    raise

print('classifier_data.p saved.')

Saving pre processed...
preprocessed_dataset.p saved.
Saving classifier file...
classifier_data.p saved.
