# Self-Driving Car Engineering
## Vehicle Detection and Tracking
A pipeline to detect and track cars in a video stream from an autonomous car's forward facing camera

## Step 1: Build and train a classifier to detect cars in an image

In [1]:
import glob
import matplotlib.image as mpimg
import numpy as np
import cv2
from skimage.feature import hog

# Extract the image names
cars = glob.glob("./vehicles/*.png")
notcars = glob.glob("./non-vehicles/*.png")

# define a few parameters for HOG feature extraction
colorspace = 'RGB' # Can be RGB, HSV, LUV, HLS, YUV, YCrCb
orient = 9
pix_per_cell = 8
cell_per_block = 2
hog_channel = 0 # Can be 0, 1, 2, or "ALL"

#### Define a few functions to extract HOG features from a list of images

In [2]:
# Define a function to return HOG features and visualization
def get_hog_features(img, orient, pix_per_cell, cell_per_block, 
                        vis=False, feature_vec=True):
    # Call with two outputs if vis==True
    if vis == True:
        features, hog_image = hog(img, orientations=orient, pixels_per_cell=(pix_per_cell, pix_per_cell),
                                  cells_per_block=(cell_per_block, cell_per_block), block_norm= 'L2-Hys',
                                  transform_sqrt=True, 
                                  visualise=vis, feature_vector=feature_vec)
        return features, hog_image
    # Otherwise call with one output
    else:      
        features = hog(img, orientations=orient, pixels_per_cell=(pix_per_cell, pix_per_cell),
                       cells_per_block=(cell_per_block, cell_per_block), block_norm= 'L2-Hys',
                       transform_sqrt=True, 
                       visualise=vis, feature_vector=feature_vec)
        return features

# Define a function to extract features from a list of images
# Have this function call bin_spatial() and color_hist()
def extract_features(imgs, cspace='RGB', orient=9, 
                        pix_per_cell=8, cell_per_block=2, hog_channel=0):
    # Create a list to append feature vectors to
    features = []
    # Iterate through the list of images
    for file in imgs:
        # Read in each one by one
        image = mpimg.imread(file)
        # apply color conversion if other than 'RGB'
        if cspace != 'RGB':
            if cspace == 'HSV':
                feature_image = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)
            elif cspace == 'LUV':
                feature_image = cv2.cvtColor(image, cv2.COLOR_RGB2LUV)
            elif cspace == 'HLS':
                feature_image = cv2.cvtColor(image, cv2.COLOR_RGB2HLS)
            elif cspace == 'YUV':
                feature_image = cv2.cvtColor(image, cv2.COLOR_RGB2YUV)
            elif cspace == 'YCrCb':
                feature_image = cv2.cvtColor(image, cv2.COLOR_RGB2YCrCb)
        else: feature_image = np.copy(image)      

        # Call get_hog_features() with vis=False, feature_vec=True
        if hog_channel == 'ALL':
            hog_features = []
            for channel in range(feature_image.shape[2]):
                hog_features.append(get_hog_features(feature_image[:,:,channel], 
                                    orient, pix_per_cell, cell_per_block, 
                                    vis=False, feature_vec=True))
            hog_features = np.ravel(hog_features)        
        else:
            hog_features = get_hog_features(feature_image[:,:,hog_channel], orient, 
                        pix_per_cell, cell_per_block, vis=False, feature_vec=True)
        # Append the new feature vector to the features list
        features.append(hog_features)
    # Return list of feature vectors
    return features


#### Extract all car and non-car features into their respective lists

In [3]:
import time

# extract HOG features
t=time.time()
car_features = extract_features(cars, cspace=colorspace, orient=orient, 
                        pix_per_cell=pix_per_cell, cell_per_block=cell_per_block, 
                        hog_channel=hog_channel)
notcar_features = extract_features(notcars, cspace=colorspace, orient=orient, 
                        pix_per_cell=pix_per_cell, cell_per_block=cell_per_block, 
                        hog_channel=hog_channel)
t2 = time.time()
print(round(t2-t, 2), 'Seconds to extract HOG features...')

30.97 Seconds to extract HOG features...


#### Set up to train the classifier

In [4]:
from sklearn import svm
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# Create an array stack of feature vectors
X = np.vstack((car_features, notcar_features)).astype(np.float64)

# Define the labels vector
y = np.hstack((np.ones(len(car_features)), np.zeros(len(notcar_features))))

# Randomly split up data into training and test sets
rand_state = np.random.randint(0, 100)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=rand_state)
    
# Fit a per-column scaler
scaler = StandardScaler().fit(X_train)
# Apply the scaler to X
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

print('Using:',orient,'orientations',pix_per_cell,
    'pixels per cell and', cell_per_block,'cells per block')
print('Feature vector length:', len(X_train[0]))

Using: 9 orientations 8 pixels per cell and 2 cells per block
Feature vector length: 1764


#### Use Grid Search to automatically fit the classifer with the best parameters

The best parameters found through Grid Search was the RBF kernel with the penalty parameter (C) set to 35 and the kernel coefficient (gamma) set to automatic (1/n_features)

In [5]:
from sklearn.model_selection import GridSearchCV

t=time.time()

# define grid search parameters
# parameters = [{'C': [10], 'kernel': ['linear']}]
classifier = svm.SVC(C=35, kernel='rbf')

# run

# classifier = GridSearchCV(svc, parameters, verbose=10)
classifier.fit(X_train, y_train)

print(round(time.time()-t, 2), 'seconds to train...')

# Check the score of the SVC
print('Test Accuracy of SVC = ', round(classifier.score(X_test, y_test), 4))
# print('The optimal parameters determined by Grid Search are: ', classifier.best_params_)

69.01 seconds to train...
Test Accuracy of SVC =  0.9527


#### Save the trained model
Now that the model is working well, ~96% accuracy, comment out the save function so I don't accidentally save over my results.

In [6]:
from sklearn.externals import joblib
# joblib.dump(classifier, 'svm_model.pkl') 

#### Restore the trained model

In [7]:
classifier = joblib.load('svm_model.pkl')

#### See how well our model generalizes and where it has trouble by generating the confusion matrix

In [8]:
# Predict the Test set results
y_pred = classifier.predict(X_test)

# Make the Confusion Matrix
from sklearn.metrics import confusion_matrix
# cm = confusion_matrix(y_test, y_pred)
tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
print("True negatives: ", tn, "\nFalse positives: ", fp, "\nFalse negatives: ", fn, "\nTrue positives: ", tp)

True negatives:  1744 
False positives:  35 
False negatives:  31 
True positives:  1742


We can see from the result of the confusion matrix that the model is performing in a very balanced manner. The frequency of falsely identifying a car and falsely predicting the absence of a car, are about equal.

#### Detection and tracking pipeline
Now that we have a trained classifier, it's time to build a pipeline to accept images from a video stream, detct cars, and track them in subsequent frames. First we will define the functions needed to do these separate steps. First we define a function to extract HOG features from an image in the same way that we did to train the classifier. The only difference is that this function extracts from a single image rather than the entire dataset.

In [9]:
# Function to extract features from a single image window
# This function is very similar to extract_features()
# just for a single image rather than list of images
def single_img_features(img, color_space='RGB', spatial_size=(32, 32),
                        hist_bins=32, orient=9, 
                        pix_per_cell=8, cell_per_block=2, hog_channel=0,
                        spatial_feat=True, hist_feat=True, hog_feat=True):
    
    # Define an empty list to receive features
    img_features = []
    
    # Apply color conversion if other than 'RGB'
    if color_space != 'RGB':
        if color_space == 'HSV':
            feature_image = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)
        elif color_space == 'LUV':
            feature_image = cv2.cvtColor(img, cv2.COLOR_RGB2LUV)
        elif color_space == 'HLS':
            feature_image = cv2.cvtColor(img, cv2.COLOR_RGB2HLS)
        elif color_space == 'YUV':
            feature_image = cv2.cvtColor(img, cv2.COLOR_RGB2YUV)
        elif color_space == 'YCrCb':
            feature_image = cv2.cvtColor(img, cv2.COLOR_RGB2YCrCb)
    else: feature_image = np.copy(img)
    
# Spatial binning and color histogram features were not used for training so they should not be an option for testing
# Leaving them in for future upgrade if necessary
#     # Compute spatial features if flag is set
#     if spatial_feat == True:
#         spatial_features = bin_spatial(feature_image, size=spatial_size)
#         img_features.append(spatial_features)
    
#     # Compute histogram features if flag is set
#     if hist_feat == True:
#         hist_features = color_hist(feature_image, nbins=hist_bins)
#         img_features.append(hist_features)

    # Compute HOG features if flag is set
    if hog_feat == True:
        if hog_channel == 'ALL':
            hog_features = []
            for channel in range(feature_image.shape[2]):
                hog_features.extend(get_hog_features(feature_image[:,:,channel], 
                                    orient, pix_per_cell, cell_per_block, 
                                    vis=False, feature_vec=True))      
        else:
            hog_features = get_hog_features(feature_image[:,:,hog_channel], orient, 
                        pix_per_cell, cell_per_block, vis=False, feature_vec=True)
            img_features.append(hog_features)

    # Return concatenated array of features
    return np.concatenate(img_features)

Then define a function that, given an image, breaks the image up into windows to be feature extracted and classified:

In [10]:
# Define a function that takes an image,
# start and stop positions in both x and y, 
# window size (x and y dimensions),  
# and overlap fraction (for both x and y)
def slide_window(img, x_start_stop=[None, None], y_start_stop=[None, None], 
                    xy_window=(64, 64), xy_overlap=(0.5, 0.5)):
    
    # If x and/or y start/stop positions not defined, set to image size
    if x_start_stop[0] == None:
        x_start_stop[0] = 0
    if x_start_stop[1] == None:
        x_start_stop[1] = img.shape[1]
    if y_start_stop[0] == None:
        y_start_stop[0] = 0
    if y_start_stop[1] == None:
        y_start_stop[1] = img.shape[0]
    
    # Compute the span of the region to be searched    
    xspan = x_start_stop[1] - x_start_stop[0]
    yspan = y_start_stop[1] - y_start_stop[0]
    
    # Compute the number of pixels per step in x/y
    nx_pix_per_step = np.int(xy_window[0]*(1 - xy_overlap[0]))
    ny_pix_per_step = np.int(xy_window[1]*(1 - xy_overlap[1]))
    
    # Compute the number of windows in x/y
    nx_buffer = np.int(xy_window[0]*(xy_overlap[0]))
    ny_buffer = np.int(xy_window[1]*(xy_overlap[1]))
    nx_windows = np.int((xspan-nx_buffer)/nx_pix_per_step) 
    ny_windows = np.int((yspan-ny_buffer)/ny_pix_per_step) 
    
    # Initialize a list to append window positions to
    window_list = []
    
    # Loop through finding x and y window positions
    # Note: we could vectorize this step, but in practice
    # we'll be considering windows one by one with the
    # classifier, so looping makes sense
    for ys in range(ny_windows):
        for xs in range(nx_windows):
            
            # Calculate window position
            startx = xs*nx_pix_per_step + x_start_stop[0]
            endx = startx + xy_window[0]
            starty = ys*ny_pix_per_step + y_start_stop[0]
            endy = starty + xy_window[1]
            
            window_list.append(((int(startx), int(starty)), (int(endx), int(endy))))
            
    return window_list

Then we need a function to search the windows returned from slide_window for cars. This function will use the feature extraction function we defined earlier and run the extracted features through the classifier.

In [22]:
# Define a function that we will pass an image 
# and the list of windows to be searched (output of slide_windows())

import sys


def search_windows(img, windows, classifier, scaler, color_space='RGB', 
                    spatial_size=(32, 32), hist_bins=32, 
                    hist_range=(0, 256), orient=9, 
                    pix_per_cell=8, cell_per_block=2, 
                    hog_channel=0, spatial_feat=True, 
                    hist_feat=True, hog_feat=True):

    # to hold positive detection windows
    on_windows = []
    
    # Iterate over all windows in the list
    for window in windows:
        # Extract the test window from original image
        test_img = img[int(window[0][1]):int(window[1][1]), int(window[0][0]):int(window[1][0]), :]
        test_img = cv2.resize(test_img, (64, 64))      
        
        # Extract features for that window using single_img_features()
        features = single_img_features(test_img, color_space=color_space, 
                            spatial_size=spatial_size, hist_bins=hist_bins, 
                            orient=orient, pix_per_cell=pix_per_cell, 
                            cell_per_block=cell_per_block, 
                            hog_channel=hog_channel, spatial_feat=spatial_feat, 
                            hist_feat=hist_feat, hog_feat=hog_feat)
        
        # Scale extracted features to be fed to classifier
        test_features = scaler.transform(np.array(features).reshape(1, -1))
        
        # Predict using the trained classifier
        prediction = classifier.predict(test_features)
        

        if prediction == 1:
            on_windows.append(window)
    
    return on_windows

Since we will allow some overlap in the sliding windows function, the above series of functions will almost certainly return multiple detections on the same car. Also, since our classifier isn't perfect, we will naturally get some false positives. Both of these need to be filtered out. The method to solve both at the same time is by using a heatmap. For an initially black image (set to 0), for every positive detection, the pixels within the detection window will be incremented. After any pixel below a given threshold will be set back to 0. As a result, only the locations with multiple detections will show a car.

In [23]:
def heat_threshold(heatmap, bbox_list, threshold):
    for box in bbox_list:
        
        # Add += 1 for all pixels inside each bbox
        # Assuming each "box" takes the form ((x1, y1), (x2, y2))
        heatmap[box[0][1]:box[1][1], box[0][0]:box[1][0]] += 1
    
    # Zero out pixels below the threshold
    heatmap[heatmap <= threshold] = 0

    return heatmap

Finally, we need a function to draw bounding boxes around the detected vehicles. The input to this function is simply the heatmap and the original image. We will use a scipy function to label the distinct objects in the heatmap, and then draw boxes around those labelled locations onto the original image

In [24]:
from scipy.ndimage.measurements import label

def draw_labeled_bboxes(img, heatmap):
    
    labels = label(heatmap)
    
    # Iterate through all detected cars
    for car_number in range(1, labels[1]+1):
        # Find pixels with each car_number label value
        nonzero = (labels[0] == car_number).nonzero()
        # Identify x and y values of those pixels
        nonzeroy = np.array(nonzero[0])
        nonzerox = np.array(nonzero[1])
        # Define a bounding box based on min/max x and y
        bbox = ((np.min(nonzerox), np.min(nonzeroy)), (np.max(nonzerox), np.max(nonzeroy)))
        # Draw the box on the image
        cv2.rectangle(img, bbox[0], bbox[1], (0,0,255), 6)
    # Return the image
    return img

Now that we have the entire functionality of the pipeline created, we ned to put it all together into one function that the moviepy editor can use to edit video frames.

In [27]:
def tracking_pipeline(image):
   
    # only search the bottom half of the image
    y_start_stop = [image.shape[0] * 0.5, image.shape[0]] # Min and max in y to search in slide_window()
    
    # first get the windows we want to feature extract and classify from
    windows = slide_window(image, x_start_stop=[None, None], y_start_stop=y_start_stop, 
                    xy_window=(96, 96), xy_overlap=(0.5, 0.5))
    
    # determine which windows have a car in them
    hot_windows = search_windows(image, windows, classifier, scaler, 
                        orient=orient, pix_per_cell=pix_per_cell, 
                        cell_per_block=cell_per_block, 
                        hog_channel=hog_channel)
    
    # build a heatmap and threshold it
    heatmap = np.zeros_like(image[:,:,0]).astype(np.float)
    heatmap = heat_threshold(heatmap, hot_windows, 3)
    
    # draw bounding boxes around the heatmap locations
    final = draw_labeled_bboxes(image, heatmap)
    
    return final

Then finally add the functionality to grab single frames from a video:

In [28]:
# Import everything needed to edit/save/watch video clips
from moviepy.editor import VideoFileClip
from IPython.display import HTML

output = 'test_videos_output/result.mp4'
## To speed up the testing process you may want to try your pipeline on a shorter subclip of the video
## To do so add .subclip(start_second,end_second) to the end of the line below
## Where start_second and end_second are integer values representing the start and end of the subclip

clip1 = VideoFileClip("project_video.mp4").subclip(5,10)
clip = clip1.fl_image(tracking_pipeline) # NOTE: this function expects color images!!
%time clip.write_videofile(output, audio=False)

[MoviePy] >>>> Building video test_videos_output/result.mp4
[MoviePy] Writing video test_videos_output/result.mp4


 99%|███████████████████████████████████████████████████████████████████████████████▎| 125/126 [02:00<00:00,  1.03it/s]


[MoviePy] Done.
[MoviePy] >>>> Video ready: test_videos_output/result.mp4 

Wall time: 2min 1s
