In [83]:
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import numpy as np
import cv2

import glob
import time
import tqdm

from sklearn.svm import LinearSVC
from sklearn.model_selection import GridSearchCV

from sklearn.preprocessing import StandardScaler
from skimage.feature import hog
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle as sk_shuffle

In [84]:
# Divide up into cars and notcars
images = glob.glob('*.jpeg')
data_set = []

images = glob.glob('*vehicles*/**/*png')
for image in images:
    label = 0
    if image.split('/')[0] == 'vehicles':
        label = 1
    data_set.append((image, label))
    
print("Total labels: ", len(data_set))
print("Total Veichles: ", len([x for x in data_set if x[1] == 1]))
print("Total Non- Veichles: ", len([x for x in data_set if x[1] == 0]))
print(data_set[0], data_set[9000])

Total labels:  17760
Total Veichles:  8792
Total Non- Veichles:  8968
('non-vehicles/GTI/image3419.png', 0) ('vehicles/GTI_Right/image0449.png', 1)


In [85]:
# Define a function to return HOG features and visualization
def get_hog_features(img, orient, pix_per_cell, cell_per_block, 
                        vis=False, feature_vec=True):
    # Call with two outputs if vis==True
    if vis == True:
        features, hog_image = hog(img, orientations=orient, pixels_per_cell=(pix_per_cell, pix_per_cell),
                                  cells_per_block=(cell_per_block, cell_per_block), block_norm= 'L2-Hys',
                                  transform_sqrt=True, 
                                  visualise=vis, feature_vector=feature_vec)
        return features, hog_image
    # Otherwise call with one output
    else:      
        features = hog(img, orientations=orient, pixels_per_cell=(pix_per_cell, pix_per_cell),
                       cells_per_block=(cell_per_block, cell_per_block), block_norm= 'L2-Hys',
                       transform_sqrt=True, 
                       visualise=vis, feature_vector=feature_vec)
        return features

In [86]:
# Define a function to extract features from a list of images
# Have this function call bin_spatial() and color_hist()
def extract_features(imgs, cspace='RGB', orient=9, 
                        pix_per_cell=8, cell_per_block=2, hog_channel=0):
    # Create a list to append feature vectors to
    features = []

    # Iterate through the list of images
    for file in tqdm.tqdm(imgs):
        # Read in each one by one
        image = mpimg.imread(file[0])

        # apply color conversion if other than 'RGB'
        if cspace != 'RGB':
            if cspace == 'HSV':
                feature_image = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)
            elif cspace == 'LUV':
                feature_image = cv2.cvtColor(image, cv2.COLOR_RGB2LUV)
            elif cspace == 'HLS':
                feature_image = cv2.cvtColor(image, cv2.COLOR_RGB2HLS)
            elif cspace == 'YUV':
                feature_image = cv2.cvtColor(image, cv2.COLOR_RGB2YUV)
            elif cspace == 'YCrCb':
                feature_image = cv2.cvtColor(image, cv2.COLOR_RGB2YCrCb)
        else: feature_image = np.copy(image)      

        # Call get_hog_features() with vis=False, feature_vec=True
        if hog_channel == 'ALL':
            hog_features = []
            for channel in range(feature_image.shape[2]):
                hog_features.append(get_hog_features(feature_image[:,:,channel], 
                                    orient, pix_per_cell, cell_per_block, 
                                    vis=False, feature_vec=True))
            hog_features = np.ravel(hog_features)        
        else:
            hog_features = get_hog_features(feature_image[:,:,hog_channel], orient, 
                        pix_per_cell, cell_per_block, vis=False, feature_vec=True)
        # Append the new feature vector to the features list
        features.append(hog_features)
    # Return list of feature vectors
    return features

In [87]:
def training_pipeline(data_set):
    
    cars = [x for x in data_set if x[1] == 1]
    not_cars = [x for x in data_set if x[1] == 0]
    
    orient = 9
    pix_per_cell=8
    cell_per_block=2
    hog_channel=0
    
    not_cars_features = extract_features(imgs=cars, orient=orient, pix_per_cell=pix_per_cell, cell_per_block=cell_per_block, hog_channel=hog_channel)
    car_features = extract_features(imgs=not_cars, orient=orient)
    
    # Create an array stack of feature vectors
    X = np.vstack((car_features, not_cars_features)).astype(np.float64)

    # Define the labels vector
    y = np.hstack((np.ones(len(car_features)), np.zeros(len(not_cars_features))))

    print(len(cars))
    # Split up data into randomized training and test sets
    rand_state = np.random.randint(0, 100)
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=rand_state)
    
    # Fit a per-column scaler
    X_scaler = StandardScaler().fit(X_train)
    # Apply the scaler to X
    X_train = X_scaler.transform(X_train)
    X_test = X_scaler.transform(X_test)
    
    print('Using:',orient,'orientations',pix_per_cell,
    'pixels per cell and', cell_per_block,'cells per block')
    print('Feature vector length:', len(X_train[0]))
    
    return X_train, X_test, y_train, y_test


X_train, X_test, y_train, y_test = training_pipeline(data_set)

100%|██████████| 8792/8792 [00:46<00:00, 190.38it/s]
100%|██████████| 8968/8968 [00:47<00:00, 187.31it/s]


8792
Using: 9 orientations 8 pixels per cell and 2 cells per block
Feature vector length: 1764


In [None]:
def create_clf(X_train, X_test, y_train, y_test, parameters=None):
    # Use a linear SVC 
    svc = LinearSVC()
        
    clf = GridSearchCV(svc, param_grid=parameters, verbose=10)
    
    # Check the training time for the SVC
    t=time.time()
#     svc.fit(X_train, y_train)
    clf.fit(X_train, y_train)
    
    t2 = time.time()
    print(round(t2-t, 2), 'Seconds to train SVC...')
    
    svc = clf.best_estimator_
    
    # Check the score of the SVC
    print('Test Accuracy of SVC = ', round(svc.score(X_test, y_test), 4))
    
    # Check the prediction time for a single sample
    t=time.time()
    n_predict = 10
    print('My SVC predicts: ', svc.predict(X_test[0:n_predict]))
    print('For these',n_predict, 'labels: ', y_test[0:n_predict])
    t2 = time.time()
    print(round(t2-t, 5), 'Seconds to predict', n_predict,'labels with SVC')
    
    print(sorted(clf.cv_results_.keys()))
    sorted(clf.cv_results_.keys())
#     return svc
    return clf.best_estimator_

X_traint, y_traint = sk_shuffle(X_train, y_train, n_samples=250)
X_testt, y_testt = sk_shuffle(X_test, y_test, n_samples=250)

parameters = [{
              'loss':['hinge', 'squared_hinge'], 
              'C':[1, 10, 20],
              'multi_class': ['ovr', 'crammer_singer'],
             },
            ]

svc = create_clf(X_train, X_test, y_train, y_test, parameters=parameters)

Fitting 3 folds for each of 12 candidates, totalling 36 fits
[CV] C=1, loss=hinge, multi_class=ovr ................................
[CV]  C=1, loss=hinge, multi_class=ovr, score=0.8990709459459459, total=  12.2s
[CV] C=1, loss=hinge, multi_class=ovr ................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   12.3s remaining:    0.0s


[CV]  C=1, loss=hinge, multi_class=ovr, score=0.8940033783783784, total=  12.1s
[CV] C=1, loss=hinge, multi_class=ovr ................................


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:   24.4s remaining:    0.0s


[CV]  C=1, loss=hinge, multi_class=ovr, score=0.8980152027027027, total=  12.4s
[CV] C=1, loss=hinge, multi_class=crammer_singer .....................


[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:   36.9s remaining:    0.0s


[CV]  C=1, loss=hinge, multi_class=crammer_singer, score=0.895481418918919, total= 6.8min
[CV] C=1, loss=hinge, multi_class=crammer_singer .....................


[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:  7.5min remaining:    0.0s


In [None]:
print(svc)