In [1]:
import os
from glob import glob
from random import random
import pickle
import time

import numpy as np
import pandas as pn
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import cv2
from mpl_toolkits.mplot3d import Axes3D
from skimage.feature import hog
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC
from sklearn.model_selection import GridSearchCV
from scipy.ndimage.measurements import label

from moviepy.editor import VideoFileClip
from IPython.display import HTML

%matplotlib inline

In [2]:
# Project parameters
TEST_IMGS_PATH = "./test_images"
OUTPUT_IMGS_PATH = "./output_images"
MODEL_PATH = "model.pkl"
TEST_VIDEO_PATH = './test_video.mp4'
PROJECT_VIDEO_PATH = './project_video.mp4'

In [None]:
# Load images
test_imgs = []
for img_f in glob(os.path.join(TEST_IMGS_PATH, "*.jpg")):
    test_imgs.append(mpimg.imread(img_f))

non_vehicle_imgs = []
for img_f in glob(os.path.join(TEST_IMGS_PATH, "non-vehicles/**/*.png")):
    non_vehicle_imgs.append(mpimg.imread(img_f))
    
vehicle_imgs = []
for img_f in glob(os.path.join(TEST_IMGS_PATH, "vehicles/**/*.png")):
    vehicle_imgs.append(mpimg.imread(img_f))

# Data Statistic

In [None]:
print("total number of test images: {}".format(len(test_imgs)))
print("total number of non-vehicle images: {}".format(len(non_vehicle_imgs)))
print("total number of vehicle images: {}".format(len(vehicle_imgs)))

# Feature Extraction
## Color Space

In [3]:
def plot3d(pixels, colors_rgb,
        axis_labels=list("RGB"), axis_limits=[(0, 255), (0, 255), (0, 255)]):
    """Plot pixels in 3D."""

    # Create figure and 3D axes
    fig = plt.figure(figsize=(4, 4))
    ax = Axes3D(fig)

    # Set axis limits
    ax.set_xlim(*axis_limits[0])
    ax.set_ylim(*axis_limits[1])
    ax.set_zlim(*axis_limits[2])

    # Set axis labels and sizes
    ax.tick_params(axis='both', which='major', labelsize=14, pad=8)
    ax.set_xlabel(axis_labels[0], fontsize=16, labelpad=16)
    ax.set_ylabel(axis_labels[1], fontsize=16, labelpad=16)
    ax.set_zlabel(axis_labels[2], fontsize=16, labelpad=16)

    # Plot pixel values with colors given in colors_rgb
    ax.scatter(
        pixels[:, :, 0].ravel(),
        pixels[:, :, 1].ravel(),
        pixels[:, :, 2].ravel(),
        c=colors_rgb.reshape((-1, 3)), edgecolors='none')

    return ax  # return Axes3D object for further manipulation

def convert_color(img, color_space):
    if "RGB" != color_space:
        return cv2.cvtColor(img, getattr(cv2, "COLOR_RGB2{}".format(color_space)))
    else:
        return img

def show_color_space(img, color_space="RGB"):
    """Explores the distribution of color values."""
    
    # Select a small fraction of pixels to plot by subsampling it
    scale = max(img.shape[0], img.shape[1], 64) / 64  # at most 64 rows and columns
    img_small = cv2.resize(img, (np.int(img.shape[1] / scale), np.int(img.shape[0] / scale)), interpolation=cv2.INTER_NEAREST)
    img_small_rgb = img_small / 255.
    img_small = convert_color(img_small, color_space)
    
    plot3d(img_small, img_small_rgb, axis_labels=list(color_space) if color_space != 'YCrCb' else ['Y','Cr','Cb'])
    plt.show()

In [None]:
vehicle_img = cv2.cvtColor(
    cv2.imread(os.path.join(TEST_IMGS_PATH, 'vehicles/KITTI_extracted/444.png')), 
              cv2.COLOR_BGR2RGB)
non_vehicle_img = cv2.cvtColor(
    cv2.imread(os.path.join(TEST_IMGS_PATH, 'non-vehicles/Extras/extra40.png')),
              cv2.COLOR_BGR2RGB)

f, (ax1, ax2) = plt.subplots(1, 2)
ax1.imshow(vehicle_img)
ax1.set_title('Example Car Image')
ax2.imshow(non_vehicle_img)
ax2.set_title('Example Non Car Image')

In [None]:
show_color_space(vehicle_img)
show_color_space(non_vehicle_img)

In [None]:
show_color_space(vehicle_img, "HSV")
show_color_space(non_vehicle_img, "HSV")

In [None]:
show_color_space(vehicle_img, "YUV")
show_color_space(non_vehicle_img, "YUV")

In [None]:
show_color_space(vehicle_img, "YCrCb")
show_color_space(non_vehicle_img, "YCrCb")

In [4]:
def color_hist(img, color_space='RGB', nbins=32, bins_range=(0, 256)):
    """Extracts color histogram features."""
    img = convert_color(img, color_space) / 255 * (bins_range[1] - bins_range[0])
    
    # Compute the histogram of the color channels separately
    channel1_hist = np.histogram(img[:,:,0], bins=nbins, range=bins_range)
    channel2_hist = np.histogram(img[:,:,1], bins=nbins, range=bins_range)
    channel3_hist = np.histogram(img[:,:,2], bins=nbins, range=bins_range)
    # Concatenate the histograms into a single feature vector
    hist_features = np.concatenate((channel1_hist[0], channel2_hist[0], channel3_hist[0]))
    # Return the individual histograms, bin_centers and feature vector
    return hist_features

In [None]:
vehicle_feat = color_hist(vehicle_img, color_space="YUV", bins_range=(0, 1))
non_vehicle_feat = color_hist(non_vehicle_img, color_space="YUV", bins_range=(0, 1))

plt.figure()
plt.plot(vehicle_feat, label='vehicle')
plt.plot(non_vehicle_feat, label='non-vehicle')
plt.legend(loc='upper left')

In [5]:
def bin_spatial(img, color_space='RGB', size=(32, 32)):
    """Coverts image to one dimensional feature vector of specific color space."""
    # Convert image to new color space (if specified)
    if color_space != 'RGB':
        feature_image = cv2.cvtColor(img, getattr(cv2, "COLOR_RGB2{}".format(color_space)))
    else: feature_image = np.copy(img)             
    # Use cv2.resize().ravel() to create the feature vector
    features = cv2.resize(feature_image, size).ravel() 
    # Return the feature vector
    return features

In [None]:
# vehicle feature vector vs non vehicle feature vecture
vehicle_feat = bin_spatial(vehicle_img, color_space="YUV", size=(32, 32))
non_vehicle_feat = bin_spatial(non_vehicle_img, color_space="YUV", size=(32, 32))

plt.figure()
plt.plot(vehicle_feat, label='vehicle')
plt.plot(non_vehicle_feat, label='non-vehicle')
plt.legend(loc='upper left')

## Histogram of Oriented Gradient

In [6]:
# Define a function to return HOG features and visualization
def get_hog_features(img, orient=9, pix_per_cell=8, cell_per_block=2, vis=False, feature_vec=True):
    """Extracts HOG features and generates visualization."""
    
    if vis == True:
        features, hog_image = hog(img, orientations=orient, pixels_per_cell=(pix_per_cell, pix_per_cell),
                                  cells_per_block=(cell_per_block, cell_per_block), transform_sqrt=False, 
                                  visualise=True, feature_vector=False)
        return features, hog_image
    else:      
        features = hog(img, orientations=orient, pixels_per_cell=(pix_per_cell, pix_per_cell),
                       cells_per_block=(cell_per_block, cell_per_block), transform_sqrt=False, 
                       visualise=False, feature_vector=feature_vec)
        return features

In [None]:
gray = cv2.cvtColor(vehicle_img, cv2.COLOR_RGB2GRAY)
features, hog_image = get_hog_features(gray, vis=True, feature_vec=False)
f, (ax1, ax2) = plt.subplots(1, 2)
ax1.imshow(gray, cmap='gray')
ax1.set_title('Example Car Image')
ax2.imshow(hog_image, cmap='gray')
ax2.set_title('HOG Visualization')

In [None]:
converted_img = convert_color(vehicle_img, 'YCrCb')

for c in range(converted_img.shape[2]):
    ch = converted_img[:,:,c]
    features, hog_image = get_hog_features(ch, vis=True, feature_vec=False)
    f, (ax1, ax2) = plt.subplots(1, 2)
    ax1.imshow(ch, cmap='gray')
    ax1.set_title('Example Car Image')
    ax2.imshow(hog_image, cmap='gray')
    ax2.set_title('HOG Visualization')

In [None]:
converted_img = convert_color(non_vehicle_img, 'YCrCb')

for c in range(converted_img.shape[2]):
    ch = converted_img[:,:,c]
    features, hog_image = get_hog_features(ch, vis=True, feature_vec=False)
    f, (ax1, ax2) = plt.subplots(1, 2)
    ax1.imshow(ch, cmap='gray')
    ax1.set_title('Example Non Car Image')
    ax2.imshow(hog_image, cmap='gray')
    ax2.set_title('HOG Visualization')

# Vehicle Image Classifier
## Combine and Normalize Features

In [None]:
def extract_features(imgs, flip_rate=0.0, cs_list=['YUV', 'YUV', 'YCrCb'], 
                     nbins=32, bins_range=(0, 256),
                     spatial_size=(32, 32),
                     hog_channels=[0,1,2], orient=9, pix_per_cell=8, cell_per_block=2,
                     hist_feat=True, spatial_feat=True, hog_feat=True):
    """
    It extracts desired features from images.
    """
    features = []
    augment_imgs = []
    
    for img in imgs:
        hist_features = []
        spatial_features = []
        hog_features = []
        
        if hist_feat:
            hist_features = color_hist(img, color_space=cs_list[0], nbins=nbins, bins_range=bins_range)
        
        if spatial_feat:
            spatial_features = bin_spatial(img, color_space=cs_list[1], size=spatial_size)
        
        if hog_feat:
            converted_img = convert_color(img, cs_list[2])
            
            hog_feats = []
            for c in hog_channels:
                ch = converted_img[:,:,c]
                hog_feats.append(get_hog_features(ch, orient, 
                                                  pix_per_cell, cell_per_block,
                                                  feature_vec=True))
            hog_features = np.ravel(hog_feats)
        
        features.append(np.hstack((hist_features, spatial_features, hog_features)))
    
        # augment data by fliping at a certain rate that is smaller than 1.0 (duplicate)
        if random() < flip_rate:
            augment_imgs.append(cv2.flip(img, 0))
    
    if ([] != augment_imgs):
        # add features from augment data
        augment_features = extract_features(augment_imgs, cs_list=cs_list,
                                            nbins=nbins, bins_range=bins_range,
                                            spatial_size=spatial_size, 
                                            orient=orient, 
                                            pix_per_cell=pix_per_cell, 
                                            cell_per_block=cell_per_block,
                                            hist_feat=hist_feat, 
                                            spatial_feat=spatial_feat, 
                                            hog_feat=hog_feat)
        features.extend(augment_features)
        
    return features

In [None]:
cs_list=['YUV', 'YUV', 'YCrCb']
nbins=32
bins_range=(0,1)
spatial_size=(32,32)
hog_channels=[0,1,2]
orient=9
pix_per_cell=8
cell_per_block=2
hist_feat=True
spatial_feat=True
hog_feat=True

car_features = extract_features(vehicle_imgs, 
                                nbins=nbins, bins_range=bins_range,
                                spatial_size=spatial_size,
                                hog_channels=hog_channels,
                                orient=orient, 
                                pix_per_cell=pix_per_cell, 
                                cell_per_block=cell_per_block,
                                hist_feat=hist_feat, 
                                spatial_feat=spatial_feat, 
                                hog_feat=hog_feat)
non_car_features = extract_features(non_vehicle_imgs,
                                    nbins=nbins, bins_range=bins_range,
                                    spatial_size=spatial_size,
                                    hog_channels=hog_channels,
                                    orient=orient, 
                                    pix_per_cell=pix_per_cell, 
                                    cell_per_block=cell_per_block,
                                    hist_feat=hist_feat, 
                                    spatial_feat=spatial_feat, 
                                    hog_feat=hog_feat)

print("Total number of vehicle features: {}".format(len(car_features)))
print("Total number of non vehicle features: {}".format(len(non_car_features)))

if len(car_features) > 0:
    # Create an array stack of feature vectors
    X = np.vstack((car_features, non_car_features)).astype(np.float64)                        
    # Fit a per-column scaler
    X_scaler = StandardScaler().fit(X)
    # Apply the scaler to X
    scaled_X = X_scaler.transform(X)
    car_ind = np.random.randint(0, len(vehicle_imgs))
    # Plot an example of raw and scaled features
    fig = plt.figure(figsize=(12,4))
    plt.subplot(131)
    plt.imshow(vehicle_imgs[car_ind])
    plt.title('Original Image')
    plt.subplot(132)
    plt.plot(X[car_ind])
    plt.title('Raw Features')
    plt.subplot(133)
    plt.plot(scaled_X[car_ind])
    plt.title('Normalized Features')
    fig.tight_layout()
    
    non_car_ind = np.random.randint(0, len(non_vehicle_imgs))
    fig = plt.figure(figsize=(12,4))
    plt.subplot(131)
    plt.imshow(non_vehicle_imgs[non_car_ind])
    plt.title('Original Image')
    plt.subplot(132)
    plt.plot(X[len(non_vehicle_imgs) + non_car_ind])
    plt.title('Raw Features')
    plt.subplot(133)
    plt.plot(scaled_X[len(non_vehicle_imgs) + non_car_ind])
    plt.title('Normalized Features')
    fig.tight_layout()
else: 
    print('Your function only returns empty feature vectors...')

In [None]:
print("Total of {} features".format(len(car_features[0])))

## Data Set Generation

In [None]:
# Define the labels vector
y = np.hstack((np.ones(len(car_features)), np.zeros(len(non_car_features))))

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)

X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2)

## Training

In [None]:
parameters = {'C':[0.1, 1, 10]}
svr = LinearSVC()
# Check the training time for the SVC
t = time.time()
clf = GridSearchCV(svr, parameters)
clf.fit(X_scaler.transform(X_train), y_train)
t2 = time.time()
print(round(t2-t, 2), 'Seconds to train SVC...')
print('Training Accuracy = ', round(clf.score(X_scaler.transform(X_train), y_train), 4))
print('Validation Accuracy = ', round(clf.score(X_scaler.transform(X_val), y_val), 4))

In [None]:
print("Best params: {}".format(clf.best_params_))

model = {
    "cs_list": ['YUV', 'YUV', 'YCrCb'],
    "nbins": 32,
    "bins_range": (0,1),
    "spatial_size": (32,32),
    "hog_channels": [0,1,2],
    "orient": 9,
    "pix_per_cell": 8,
    "cell_per_block": 2,
    "hist_feat": True,
    "spatial_feat": True,
    "hog_feat": True,
    "X_scaler": X_scaler,
    "clf": clf
}

# Store dataset for more runs
with open(MODEL_PATH, 'wb') as pkl_file:
    pickle.dump(model, pkl_file)

# Hog Sub-sampling Window Search

In [7]:
def draw_boxes(img, bboxes, color=(0, 0, 255), thick=6):
    """
    A helper function for drawing detection boxes on image.
    """
    imcopy = np.copy(img)
    for bbox in bboxes:
        # Draw a rectangle given bbox coordinates
        cv2.rectangle(imcopy, bbox[0], bbox[1], color, thick)
    # Return the image copy with boxes drawn
    return imcopy


def find_cars(img, ystart, ystop, scale, X_scaler, clf,
              window_xy=(64, 64), cells_per_step=2,
              cs_list=['YUV', 'YUV', 'YCrCb'],
              hist_bins=32, bins_range=(0,1),
              spatial_size=(32,32),
              hog_channels=[0,1,2],
              orient=9, 
              pix_per_cell=8, 
              cell_per_block=2):
    """
    Applys trained classifier on sliding windows to return windows in which
    car is detected.
    Feature extraction of each sliding windows is using Hog Sub-sampling.
    """
    heat_windows = []
    # as we trained with images contains value (0, 1), we need to make sure
    # input image also contains value (0, 1)
    img = img.astype(np.float32) / 255
    img_tosearch = img[ystart:ystop,:,:]
    
    # Extract HOG features for the entire search part
    hog_tosearch = convert_color(img_tosearch, cs_list[2])
    if scale != 1:
        imshape = hog_tosearch.shape
        hog_tosearch = cv2.resize(hog_tosearch, (np.int(imshape[1]/scale), np.int(imshape[0]/scale)))
    
    ch1 = hog_tosearch[:,:,0]
    ch2 = hog_tosearch[:,:,1]
    ch3 = hog_tosearch[:,:,2]
    
    ## Define blocks and steps as above
    nxblocks = (hog_tosearch.shape[1] // pix_per_cell) - cell_per_block + 1
    nyblocks = (hog_tosearch.shape[0] // pix_per_cell) - cell_per_block + 1
    window_x_blocks = window_xy[1] // pix_per_cell - cell_per_block + 1
    window_y_blocks = window_xy[0] // pix_per_cell - cell_per_block + 1
    nxsteps = (nxblocks - window_x_blocks) // cells_per_step
    nysteps = (nyblocks - window_y_blocks) // cells_per_step
    
    ## Compute individual channel HOG features for the entire image
    hog1 = get_hog_features(ch1, orient, pix_per_cell, cell_per_block, feature_vec=False)
    hog2 = get_hog_features(ch2, orient, pix_per_cell, cell_per_block, feature_vec=False)
    hog3 = get_hog_features(ch3, orient, pix_per_cell, cell_per_block, feature_vec=False)
    
    for xb in range(nxsteps):
        for yb in range(nysteps):
            ypos = yb*cells_per_step
            xpos = xb*cells_per_step
            # Extract HOG for this patch
            hog_feat1 = hog1[ypos:ypos+window_y_blocks, xpos:xpos+window_x_blocks].ravel() 
            hog_feat2 = hog2[ypos:ypos+window_y_blocks, xpos:xpos+window_x_blocks].ravel() 
            hog_feat3 = hog3[ypos:ypos+window_y_blocks, xpos:xpos+window_x_blocks].ravel() 
            hog_features = np.hstack((hog_feat1, hog_feat2, hog_feat3))

            xleft = xpos*pix_per_cell
            ytop = ypos*pix_per_cell

            # Extract the image patch
            subimg = cv2.resize(img_tosearch[ytop:ytop+window_xy[0], xleft:xleft+window_xy[1]], (64,64))
          
            # Get color features
            hist_features = color_hist(subimg, color_space=cs_list[0], nbins=hist_bins, bins_range=bins_range)
            spatial_features = bin_spatial(subimg, color_space=cs_list[1], size=spatial_size)
            
            # Scale features and make a prediction
            features = X_scaler.transform(np.hstack((hist_features, spatial_features, hog_features)).reshape(1, -1))    
            prediction = clf.predict(features)
            
            if prediction == 1:
                xbox_left = np.int(xleft*scale)
                ytop_draw = np.int(ytop*scale)
                xwin_draw = np.int(window_xy[0]*scale)
                ywin_draw = np.int(window_xy[1]*scale)
                heat_windows.append(((xbox_left, ytop_draw+ystart), (xbox_left+xwin_draw, ytop_draw+ywin_draw+ystart)))
            
    return heat_windows

In [None]:
ystart = 400
ystop = 656
scale = 1.5

heat_windows = find_cars(test_imgs[4], ystart, ystop, scale, X_scaler, clf)
out_img = draw_boxes(test_imgs[4], heat_windows)
plt.figure(figsize=(10, 5))
plt.imshow(out_img)

In [None]:
ystart = 400
ystop = 656
scale = 1

heat_windows = find_cars(test_imgs[2], ystart, ystop, scale, X_scaler, clf)
out_img = draw_boxes(test_imgs[2], heat_windows)
plt.imshow(out_img)

# Heat Map

In [8]:
def add_heat(heatmap, heat_windows):
    """Generates a heatmamp based on detected windows."""
    # Iterate through list of bboxes
    for w in heat_windows:
        # Add += 1 for all pixels inside each bbox
        # Assuming each "box" takes the form ((x1, y1), (x2, y2))
        heatmap[w[0][1]:w[1][1], w[0][0]:w[1][0]] += 1

    # Return updated heatmap
    return heatmap

def apply_threshold(heatmap, threshold):
    """Generates a thresholds heatmap."""
    # Zero out pixels below the threshold
    heatmap[heatmap <= threshold] = 0
    # Return thresholded map
    return heatmap

def draw_labeled_bboxes(img, labels):
    """Draw bounding boxes around labels."""
    # Iterate through all detected cars
    for car_number in range(1, labels[1]+1):
        # Find pixels with each car_number label value
        nonzero = (labels[0] == car_number).nonzero()
        # Identify x and y values of those pixels
        nonzeroy = np.array(nonzero[0])
        nonzerox = np.array(nonzero[1])
        # Define a bounding box based on min/max x and y
        bbox = ((np.min(nonzerox), np.min(nonzeroy)), (np.max(nonzerox), np.max(nonzeroy)))
        # Draw the box on the image
        cv2.rectangle(img, bbox[0], bbox[1], (0,0,255), 6)
    # Return the image
    return img

In [None]:
heatmap = np.zeros(test_imgs[4].shape)
heatmap = add_heat(heatmap, heat_windows)
thresh_heatmap = apply_threshold(heatmap, 2)
labels = label(thresh_heatmap)
print(labels[1], ' cars found')
plt.imshow(draw_labeled_bboxes(test_imgs[4], labels))

# Pipeline

In [9]:
with open(MODEL_PATH, 'rb') as pkl_file:
    model = pickle.load(pkl_file)

X_scaler = model["X_scaler"]
clf = model["clf"]
ystart = 400
ystop = 656
scale = [1, 1.5, 2]
heap_threshold = 3
frame_count = 0
label_freq = 6 # Generate labels for every 6 frames
pre_heat_windows = []
pre_labels = ([], 0)

def car_detector(img):
    """Car detection pipeline."""
    global frame_count
    global pre_heat_windows
    global pre_labels
    
    frame_count += 1
    
    for scl in scale:
        pre_heat_windows.extend(find_cars(img, ystart, ystop, scl, X_scaler, clf))
    
    if frame_count >= label_freq:
        heatmap = np.zeros_like(img[:,:,0]).astype(np.float)
        heatmap = add_heat(heatmap, pre_heat_windows)
        thresh_heatmap = apply_threshold(heatmap, heap_threshold)
        labels = label(thresh_heatmap)
        
        if labels[1] > 0:
            pre_labels = labels
        
        # clean up previous heat windows and frame count
        del pre_heat_windows[:]
        frame_count = 0

    draw_labeled_bboxes(img, pre_labels)
    
    return img

In [10]:
test_output = './test.mp4'
clip = VideoFileClip(TEST_VIDEO_PATH)
test_clip = clip.fl_image(car_detector)
%time test_clip.write_videofile(test_output, audio=False)

[MoviePy] >>>> Building video ./test.mp4
[MoviePy] Writing video ./test.mp4


 97%|████████████████████████████████████████████████████████████████████████████▉  | 38/39 [00:41<00:01,  1.10s/it]


[MoviePy] Done.
[MoviePy] >>>> Video ready: ./test.mp4 

Wall time: 41.9 s


In [None]:
HTML("""
<video width="960" height="540" controls>
  <source src="{0}">
</video>
""".format(test_output))

In [12]:
pre_heat_windows = []
pre_labels = ([], 0)
project_output = './project_result.mp4'
clip = VideoFileClip(PROJECT_VIDEO_PATH)
project_clip = clip.fl_image(car_detector)
%time project_clip.write_videofile(project_output, audio=False)

[MoviePy] >>>> Building video ./project_result.mp4
[MoviePy] Writing video ./project_result.mp4


100%|██████████████████████████████████████████████████████████████████████████▉| 1260/1261 [22:17<00:01,  1.06s/it]


[MoviePy] Done.
[MoviePy] >>>> Video ready: ./project_result.mp4 

Wall time: 22min 18s


In [None]:
HTML("""
<video width="960" height="540" controls>
  <source src="{0}">
</video>
""".format(project_output))