# **Vehicle Detection**
---
*Pipeline to detect vehicles in a video stream using machine learning.*

**Imports**
 - *Import all the necessary libraries*

In [4]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import glob
import time
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC
from skimage.feature import hog
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from sklearn.externals import joblib
from scipy.ndimage.measurements import label
from moviepy.editor import VideoFileClip
from IPython.display import HTML

%matplotlib inline

 - Convert from one color space to another easily

In [5]:
def convert_space(img, space, bgr_input=True):
    """
        Convert color space of image.
    """
    if bgr_input:
        if space == 'rgb':
            convert = cv2.COLOR_BGR2RGB
        elif space == 'hsv':
            convert = cv2.COLOR_BGR2HSV
        elif space == 'hls':
            convert = cv2.COLOR_BGR2HLS
        elif space == 'luv':
            convert = cv2.COLOR_BGR2LUV
        elif space == 'ycb':
            convert = cv2.COLOR_BGR2YCrCb
        else:
            assert False, "Use one of 'rgb','hsv','hls','luv', 'ycb'(YCrCb)"
    if not bgr_input:
        if space == 'rgb':
            return np.copy(img)
        elif space == 'hsv':
            convert = cv2.COLOR_RGB2HSV
        elif space == 'hls':
            convert = cv2.COLOR_RGB2HLS
        elif space == 'luv':
            convert = cv2.COLOR_RGB2LUV
        elif space == 'ycb':
            convert = cv2.COLOR_RGB2YCrCb
        else:
            assert False, "Use one of 'rgb','hsv','hls','luv', 'ycb'(YCrCb)"
    return cv2.cvtColor(img, convert)

**Helper Functions**

---
   - Get HOG features of an image

In [6]:
def get_hog_features(img, hogD):
    return hogD.compute(img)

   - Spatial Binning of color

In [7]:
def bin_spatial(image, size=(32,32)):
    return cv2.resize(image, size).ravel()

 - Color Historgram

In [8]:
def color_hist(img, nbins=32, bins_range=(0, 256)):
    channel1_hist = np.histogram(img[:,:,0], bins=nbins, range=bins_range)
    channel2_hist = np.histogram(img[:,:,1], bins=nbins, range=bins_range)
    channel3_hist = np.histogram(img[:,:,2], bins=nbins, range=bins_range)
    hist_features = np.concatenate((channel1_hist[0], channel2_hist[0], channel3_hist[0]))
    return hist_features

 - Extract Features from a single image

In [9]:
def extract(img, hogD=None, space='rgb', spatial_size=(32,32), hist_bins = 32, hog_ch='r', hog_all_ch=True,
            spatial=True, hog=True, hist=True, bgr_input=True):
    """Extract Features of a Single image"""
    features = []
    
    # Convert to desired color space
    image = convert_space(img, space, bgr_input)
    
    # If spatial features should be included
    if spatial:
        features.append(bin_spatial(image, spatial_size))
    if hist:
        features.append(color_hist(image, nbins=hist_bins))
    if hog:
        if hog_all_ch:
            hog_features = []
            for ch in range(3):
                hog_features.append(get_hog_features(image[:,:,ch], hogD))
            features.append(np.ravel(hog_features))
        else:
            ch = space.find(hog_ch)
            if ch != -1:
                hog_features.append(get_hog_features(image[:,:,ch], hogD))
            else :
                assert False, "Hog channel should be in color space eg r is in rgb, For YCrCb, ycb, c = Cr, b=Cb"
            features.append(np.ravel(hog_features))
    return np.concatenate(features)

 - Extract Features from a list of files:
 - Used to train the data

In [10]:
def extract_from_files(files, **params):
    features = []
    for file in files:
        image = cv2.imread(file)
        features.append(extract(image, **params))
    return features

 - Function to draw boxes on image

In [11]:
def draw_boxes(img, bboxes, color=(0, 255, 0), thick=6):
    imcopy = np.copy(img)
    for bbox in bboxes:
        cv2.rectangle(imcopy, bbox[0], bbox[1], color, thick)
    return imcopy

**Define parameters to extract Features**

**Training**

In [12]:
def getHogD():
    """
        Get OpenCV Hog Descriptor to use to extract HOG features. 
        HOG parameters like Block size, cellsize, orientations are defined here
    """
    winSize = (64,64)
    blockSize = (16,16)
    blockStride = (8,8)
    cellSize = (8,8)
    nbins = 9
    derivAperture = 1
    winSigma = 4.
    histogramNormType = 0
    L2HysThreshold = 2.0000000000000001e-01
    gammaCorrection = 0
    nlevels = 64
    return cv2.HOGDescriptor(winSize,blockSize,blockStride,cellSize,nbins,derivAperture,winSigma,
                            histogramNormType,L2HysThreshold,gammaCorrection,nlevels)

 - Define Training Parameters

In [13]:
# Get the HOG descriptor and define training parameters
# ycb = YCrCb
hogD = getHogD()
params = {
    'space':'ycb',
    'spatial_size':(32,32),
    'hist_bins':64,
    'hog_ch':'c',
    'hog_all_ch':True,
    'spatial':True,
    'hist':True,
    'hog':True,
    'hogD' : hogD,
    'bgr_input':True
}

 - Function to Read the training data and split into train and test sets

In [14]:
def getTrainTestData(**params):
    """
        1. Reads the training images.
        2. Extracts Feature vectors.
        3. Compute a StandardScaler to scale the training data to zero mean and unit variance.
        4. Save the scaler for future use.
        5. Split the training Data into training and test sets
    """
    t1 = time.time()
    # Read images
    car_files = glob.glob('./data/vehicles/*/*.png')
    not_car_files = glob.glob('./data/non-vehicles/*/*.png')
    
    # Extract Features 
    f_cars = extract_from_files(car_files, **params)
    f_not_cars = extract_from_files(not_car_files, **params)
    
    # Compute the scaler from feature vectors after concatenating them
    X = np.vstack((f_cars, f_not_cars)).astype(np.float32)
    y = np.hstack((np.ones(len(f_cars)), np.zeros(len(f_not_cars))))
    X_scaler = StandardScaler().fit(X)
    
    # Save the scaler for future use
    joblib.dump(X_scaler, 'scaler.p')
    
    # Scale the training feature vectors
    scaled_X = X_scaler.transform(X)
    
    # Shuffle and split the data 
    random_state = np.random.randint(0, 100)
    X_train, X_test, y_train, y_test = train_test_split(scaled_X, y, test_size=0.2, random_state=random_state)
    print ("Extracting Features took : %.2f Seconds"  % (time.time() - t1))
    return X_train, X_test, y_train, y_test, X_scaler

 - Get the scaled training data and scaler

In [89]:
X_train, X_test, y_train, y_test, X_scaler = getTrainTestData(**params)

Extracting Features took : 33.98 Seconds


In [90]:
X_train[0].shape

(8556,)

In [91]:
t1 = time.time()
# Get a linear Support Vector Machine Classifier
svc = LinearSVC()

# Fit the training data
svc.fit(X_train, y_train)

# Compute accuracy
score = svc.score(X_test, y_test) * 100
t2 = time.time()

print ("Accuracy : %.2f%%" % score)

# Save the classifier for future use. 
joblib.dump(svc, 'clf.p')
"Training took : %.3f seconds" % (t2-t1)

Accuracy : 99.30%


'Training took : 6.345 seconds'

In [15]:
svc = joblib.load('./clf.p')
X_scaler = joblib.load('./scaler.p')

In [16]:
def draw_labeled_bboxes(img, labels):
    # Iterate through all detected cars
    for car_number in range(1, labels[1]+1):
        # Find pixels with each car_number label value
        nonzero = (labels[0] == car_number).nonzero()
        # Identify x and y values of those pixels
        nonzeroy = np.array(nonzero[0])
        nonzerox = np.array(nonzero[1])
        # Define a bounding box based on min/max x and y
        bbox = ((np.min(nonzerox), np.min(nonzeroy)), (np.max(nonzerox), np.max(nonzeroy)))
        # Draw the box on the image
        cv2.rectangle(img, bbox[0], bbox[1], (0,255,0), 6)
    # Return the image
    return img

In [17]:
def slide_window(img, x_start_stop=[None, None], y_start_stop=[None, None], 
                    xy_window=(64, 64), xy_overlap=(0.5, 0.5)):
    
    # Start window start stops to whole image if not specified
    if x_start_stop[0] == None:
        x_start_stop[0] = 0
    if x_start_stop[1] == None:
        x_start_stop[1] = img.shape[1]
    if y_start_stop[0] == None:
        y_start_stop[0] = 0
    if y_start_stop[1] == None:
        y_start_stop[1] = img.shape[0]
    
    # length of x and y sides of image to be searched
    xspan = x_start_stop[1] - x_start_stop[0]
    yspan = y_start_stop[1] - y_start_stop[0]
    
    
    nx_pix_per_step = np.int(xy_window[0]*(1 - xy_overlap[0]))
    ny_pix_per_step = np.int(xy_window[1]*(1 - xy_overlap[1]))
    
    nx_buffer = np.int(xy_window[0]*(xy_overlap[0]))
    ny_buffer = np.int(xy_window[1]*(xy_overlap[1]))
    
    # Calculate number of windows in each direction
    nx_windows = np.int((xspan-nx_buffer)/nx_pix_per_step) 
    ny_windows = np.int((yspan-ny_buffer)/ny_pix_per_step) 
    
    window_list = []
    
    for ys in range(ny_windows):
        for xs in range(nx_windows):

            startx = xs*nx_pix_per_step + x_start_stop[0]
            endx = startx + xy_window[0]
            starty = ys*ny_pix_per_step + y_start_stop[0]
            endy = starty + xy_window[1]

            window_list.append(((startx, starty), (endx, endy)))

    return window_list

In [18]:
class HeatMaps():
    """
        Class to hold previous heatmaps
    """
    def __init__(self, buffer):
        self.maps = []
        self.num_maps = len(self.maps)
        self.buffer_size = buffer
    
    def addHeatMap(self, heatmap):
        """Add a new heatmap"""
        self.maps.append(heatmap)
        self.num_maps = len(self.maps)
        if self.num_maps > self.buffer_size:
            self.maps.pop(0)
    
    def getHeatMap(self):
        """Get sum of last n heatmaps"""
        if self.num_maps > 0:
            return np.sum(self.maps, axis=0)
        else:
            return None
            
            

In [19]:
HeatMap = HeatMaps(12) 
def process(image):
    """
        Finds cars in individual frames of the image by applying a sliding window search. 
        Input image is expected in RGB space. After classifying individual frames, Heat maps are 
        generated and saved. Thresholding is applied to remove false positives.
    """
    # Hold the windows with positive matches
    hits = []
    global video_params
    global HeatMap
    # Get the windows to search in 
    windows = []
    windows += slide_window(image, y_start_stop=[528,656], xy_window=(128,128), xy_overlap=(0.5, 0.5))
    windows += slide_window(image, y_start_stop=[400,656], xy_window=(96,96), xy_overlap=(0.7, 0.7))
    windows += slide_window(image, y_start_stop=[400,464], xy_window=(64,64), xy_overlap=(0.5, 0.5))
    # Get a blank heatmap
    heatmap = np.zeros_like(image[:,:,0])
    
    for window in windows:
        # Classify individual windows
        cutout = image[window[0][1]:window[1][1], window[0][0]:window[1][0]]
        cutout = cv2.resize(cutout, (64,64))
        fv = X_scaler.transform(extract(cutout, **video_params))
        if svc.predict(fv) == 1:
            # If positive match, add to hits
            hits.append(window)
    
    # add heat to heatmap in positive windows 
    for box in hits:
        heatmap[box[0][1]:box[1][1], box[0][0]:box[1][0]] += 1
    
    # Save heatmap for future frames
    HeatMap.addHeatMap(heatmap)
    
    if HeatMap.num_maps >2:
        # Get the heatmaps from past to remove jitter
        heatmap = HeatMap.getHeatMap()
    else :
        # Use a smaller threshold if Just getting started
        heatmap[heatmap<3] = 0 
    
    # Threshold the heatmap
    heatmap[heatmap<9] = 0
    
    # Get the bounding boxes 
    labels = label(heatmap)
    
    # Draw the bounding boxes on original Frame
    return draw_labeled_bboxes(image, labels)

In [20]:
# Define parameters for frames from video as video frames are RGB
video_params = params.copy()
video_params['bgr_input'] = False

In [21]:
HeatMap = HeatMaps(12) 
output = './out3.mp4'
clip1 = VideoFileClip("./project_video.mp4")
clip1 = clip1.subclip(0)
white_clip = clip1.fl_image(process) #NOTE: this function expects color images!!
%time white_clip.write_videofile(output, audio=False, verbose=False)

100%|█████████▉| 1260/1261 [09:38<00:00,  1.88it/s]


CPU times: user 9min 30s, sys: 7.01 s, total: 9min 37s
Wall time: 9min 39s


In [22]:
from IPython.display import HTML

output = './out3.mp4'
HTML("""
<video width="960" height="540" controls>
  <source src="{0}">
</video>
""".format(output))