In [1]:
### IMPORTS
import os
import glob
import time

import cv2
import numpy as np
from skimage.feature import hog
from scipy.ndimage.measurements import label

from sklearn.utils import shuffle
from sklearn.svm import LinearSVC
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import matplotlib.image as mpimg
import matplotlib.pyplot as plt

from moviepy.editor import VideoFileClip

%matplotlib inline

In [2]:
### FUNCTIONS
def color_hist(img, nbins=32, bins_range=(0,256)):
    rhist = np.histogram(img[:,:,0],nbins,bins_range)
    ghist = np.histogram(img[:,:,1],nbins,bins_range)
    bhist = np.histogram(img[:,:,2],nbins,bins_range)
    bin_edges = rhist[1]
    bin_centers = (bin_edges[1:] + bin_edges[:-1])/2
    hist_features = np.concatenate((rhist[0],ghist[0],bhist[0]))
    return hist_features


def bin_spatial(img, color_space='RGB', size=(32, 32)):
    if color_space == 'RGB':
        feature_img = np.copy(img)
    elif color_space == 'HSV':
        feature_img = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)
    elif color_space == 'LUV':
        feature_image = cv2.cvtColor(img, cv2.COLOR_RGB2LUV)
    elif color_space == 'HLS':
        feature_image = cv2.cvtColor(img, cv2.COLOR_RGB2HLS)
    elif color_space == 'YUV':
        feature_image = cv2.cvtColor(img, cv2.COLOR_RGB2YUV)
    elif color_space == 'YCrCb':
        feature_image = cv2.cvtColor(img, cv2.COLOR_RGB2YCrCb)
    resized_img = cv2.resize(img, size)
    spatial_features = resized_img.ravel()
    return spatial_features


def get_hog_features(img, orient, pix_per_cell, cell_per_block, vis=False, feature_vec=True):
    if vis == True:
        features, hog_image = hog(img, orient, (pix_per_cell, pix_per_cell),(cell_per_block, cell_per_block),
                                  vis, feature_vector=feature_vec)
        return features, hog_image
    else:      
        features = hog(img, orient, (pix_per_cell, pix_per_cell), (cell_per_block, cell_per_block),
                       vis, feature_vector=feature_vec)
        return features
    
    
def single_img_features(img, color_space='RGB', spatial_size=(32, 32),
                        hist_bins=32, orient=9, 
                        pix_per_cell=8, cell_per_block=2, hog_channel='ALL'):    
    img_features = []
    if color_space == 'RGB':
        feature_image = np.copy(img)
    elif color_space == 'HSV':
        feature_image = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)
    elif color_space == 'LUV':
        feature_image = cv2.cvtColor(img, cv2.COLOR_RGB2LUV)
    elif color_space == 'HLS':
        feature_image = cv2.cvtColor(img, cv2.COLOR_RGB2HLS)
    elif color_space == 'YUV':
        feature_image = cv2.cvtColor(img, cv2.COLOR_RGB2YUV)
    elif color_space == 'YCrCb':
        feature_image = cv2.cvtColor(img, cv2.COLOR_RGB2YCrCb)
        
    spatial_features = bin_spatial(img, size=spatial_size)
    img_features.append(spatial_features)
           
    hist_features = color_hist(feature_image, nbins=hist_bins)
    img_features.append(hist_features)
            
    if hog_channel == 'ALL':
        hog_features = []
        for channel in range(feature_image.shape[2]):
            hog_features.extend(get_hog_features(feature_image[:,:,channel], 
                                orient, pix_per_cell, cell_per_block, 
                                vis=False, feature_vec=True))      
    else:
        hog_features = get_hog_features(feature_image[:,:,hog_channel], orient, 
                    pix_per_cell, cell_per_block, vis=False, feature_vec=True)
    img_features.append(hog_features)
    img_features = np.concatenate((img_features))
    return img_features


def extract_features(imgs, cspace='RGB', orient=9, pix_per_cell=8, cell_per_block=2, hog_channel='ALL',
                     vis=False, feature_vec=True, spatial_size=(32, 32), hist_bins=32, hist_range=(0, 256)):
    features = []
    for img in imgs:
        features.append(single_img_features(img, color_space=cspace, spatial_size=spatial_size,
                                            hist_bins=hist_bins, orient=orient,
                                            pix_per_cell=pix_per_cell, cell_per_block=cell_per_block, 
                                            hog_channel=hog_channel))
    return np.vstack(features)


def slide_window(img, x_start_stop=[None, None], y_start_stop=[None, None], 
                 xy_window=(64, 64), xy_overlap=(0.5, 0.5)):
    if x_start_stop[0] == None:
        x_start_stop[0] = 0
    if x_start_stop[1] == None:
        x_start_stop[1] = img.shape[1]
    if y_start_stop[0] == None:
        y_start_stop[0] = 0
    if y_start_stop[1] == None:
        y_start_stop[1] = img.shape[0]
    xspan = x_start_stop[1] - x_start_stop[0]
    yspan = y_start_stop[1] - y_start_stop[0]
    
    nx_pix_per_step = np.int(xy_window[0]*(1 - xy_overlap[0]))
    ny_pix_per_step = np.int(xy_window[1]*(1 - xy_overlap[1]))
    
    nx_windows = np.int(xspan/nx_pix_per_step) - 1
    ny_windows = np.int(yspan/ny_pix_per_step) - 1
    
    window_list = []
    
    for ys in range(ny_windows):
        for xs in range(nx_windows):
            startx = xs*nx_pix_per_step + x_start_stop[0]
            endx = startx + xy_window[0]
            starty = ys*ny_pix_per_step + y_start_stop[0]
            endy = starty + xy_window[1]
            window_list.append(((startx, starty), (endx, endy)))
    return window_list


def draw_boxes(img, bboxes, color=(0, 0, 255), thick=6):
    out_img = np.copy(img)
    for bbox in bboxes:
        cv2.rectangle(out_img, bbox[0], bbox[1], color, thick)
    return out_img


def search_windows(img, windows, clf, scaler, color_space='RGB', 
                   spatial_size=(32, 32), hist_bins=32, 
                   hist_range=(0, 256), orient=9, 
                   pix_per_cell=8, cell_per_block=2, 
                   hog_channel='ALL'):
    on_windows = []
    for window in windows:
        test_img = cv2.resize(img[window[0][1]:window[1][1], window[0][0]:window[1][0]], (64, 64))      
        features = single_img_features(test_img, color_space=color_space, 
                            spatial_size=spatial_size, hist_bins=hist_bins, 
                            orient=orient, pix_per_cell=pix_per_cell, 
                            cell_per_block=cell_per_block, 
                            hog_channel=hog_channel)
        test_features = scaler.transform(np.array(features).reshape(1, -1))
        prediction = clf.predict(test_features)
        if prediction == 1:
            on_windows.append(window)
    return on_windows


def add_heat(heatmap, bbox_list):
    for box in bbox_list:
        heatmap[box[0][1]:box[1][1], box[0][0]:box[1][0]] += 1
    return heatmap


def apply_threshold(heatmap, threshold):
    heatmap[heatmap <= threshold] = 0
    return heatmap


def draw_labeled_bboxes(img, labels):
    for car_number in range(1, labels[1]+1):
        nonzero = (labels[0] == car_number).nonzero()
        nonzeroy = np.array(nonzero[0])
        nonzerox = np.array(nonzero[1])
        bbox = ((np.min(nonzerox), np.min(nonzeroy)), (np.max(nonzerox), np.max(nonzeroy)))
        cv2.rectangle(img, bbox[0], bbox[1], (0,0,255), 6)
    return img

In [3]:
### PARAMETERS
colorspace = 'HSV'
orient = 9
pix_per_cell = 8
cell_per_block = 2
hog_channel = "ALL"
spatial = 16
histbins = 32

In [4]:
### DATA PREPARATION
gti_far_vehicles_SET_PATH         = 'datasets/full/vehicles/GTI_Far/*'
gti_left_vehicles_SET_PATH        = 'datasets/full/vehicles/GTI_Left/*'
gti_right_vehicles_SET_PATH       = 'datasets/full/vehicles/GTI_Right/*'
gti_middleclose_vehicles_SET_PATH = 'datasets/full/vehicles/GTI_MiddleClose/*'
kitti_vehicles_SET_PATH           = 'datasets/full/vehicles/KITTI_extracted/*'
extras_non_vehicles_SET_PATH      = 'datasets/full/non-vehicles/Extras/*'
gti_non_vehicles_SET_PATH         = 'datasets/full/non-vehicles/GTI/*'

gti_far_vehicles_paths            = [path for path in glob.glob(gti_far_vehicles_SET_PATH)]
gti_left_vehicles_paths           = [path for path in glob.glob(gti_left_vehicles_SET_PATH)]
gti_right_vehicles_paths          = [path for path in glob.glob(gti_right_vehicles_SET_PATH)]
gti_middleclose_vehicles_paths    = [path for path in glob.glob(gti_middleclose_vehicles_SET_PATH)]
kitti_vehicles_paths              = [path for path in glob.glob(kitti_vehicles_SET_PATH)]
extras_non_vehicles_paths         = [path for path in glob.glob(extras_non_vehicles_SET_PATH)]
gti_non_vehicles_paths            = [path for path in glob.glob(gti_non_vehicles_SET_PATH)]

gti_far_vehicles_images           = np.array([mpimg.imread(path) for path in gti_far_vehicles_paths])
gti_left_vehicles_images          = np.array([mpimg.imread(path) for path in gti_left_vehicles_paths])
gti_right_vehicles_images         = np.array([mpimg.imread(path) for path in gti_right_vehicles_paths])
gti_middleclose_vehicles_images   = np.array([mpimg.imread(path) for path in gti_middleclose_vehicles_paths])
kitti_vehicles_images             = np.array([mpimg.imread(path) for path in kitti_vehicles_paths])
extras_non_vehicles_images        = np.array([mpimg.imread(path) for path in extras_non_vehicles_paths])
gti_non_vehicles_images           = np.array([mpimg.imread(path) for path in gti_non_vehicles_paths])

X_vehicles_images = np.concatenate([gti_far_vehicles_images,
                                    gti_left_vehicles_images,
                                    gti_right_vehicles_images,
                                    gti_middleclose_vehicles_images,
                                    kitti_vehicles_images], axis=0)
X_non_vehicles_images = np.concatenate([extras_non_vehicles_images, gti_non_vehicles_images], axis=0)
X_images = np.concatenate([X_vehicles_images, X_non_vehicles_images], axis=0)
X_images = (X_images*255).astype(np.uint8)

n_ones = X_vehicles_images.shape[0]
n_zeros = X_non_vehicles_images.shape[0]
y = np.hstack((np.ones(n_ones), np.zeros(n_zeros)))

X = extract_features(X_images, cspace=colorspace, orient=orient, pix_per_cell=pix_per_cell,
                     cell_per_block=cell_per_block, hog_channel=hog_channel, 
                     spatial_size=(spatial, spatial), hist_bins=histbins, hist_range=(0,256))

X_scaler = StandardScaler().fit(X)
X = X_scaler.transform(X)

X, y = shuffle(X, y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.1)

In [5]:
### CLASSIFIER
clf = LinearSVC(C=1)
clf.fit(X_train, y_train)
score = clf.score(X_test, y_test)

print(score)

0.996058558559


In [6]:
### SLIDING WINDOWS SETUP AND VISUALISATION
img_path = 'CarND-Vehicle-Detection/test_images/test3.jpg'
typical_img = (mpimg.imread(img_path))

window_list_64 = slide_window(typical_img, x_start_stop=[500,None], y_start_stop=[400,500],
                              xy_window=(64,64), xy_overlap=(0.5, 0.5))
window_list_96 = slide_window(typical_img, x_start_stop=[500,None], y_start_stop=[410,550],
                              xy_window=(96,96), xy_overlap=(0.66, 0.66))
window_list_120 = slide_window(typical_img, x_start_stop=[500,None], y_start_stop=[380,490],
                              xy_window=(120,120), xy_overlap=(0.75, 0.75))
window_list_140 = slide_window(typical_img, x_start_stop=[500,None], y_start_stop=[400,514],
                              xy_window=(140,140), xy_overlap=(0.75, 0.75))
window_list_160 = slide_window(typical_img, x_start_stop=[500,None], y_start_stop=[450,600],
                              xy_window=(160,160), xy_overlap=(0.75, 0.75))
# Uncomment to visualise windows
# out_img = np.copy(typical_img)
# out_img = draw_boxes(out_img, window_list_160, color=[0,255,0], thick=5)
# plt.figure(figsize=(12,120))
# plt.imshow(out_img);

In [7]:
### PIPELINE
last_heats = None

def process_image(img):
    global last_heats
    good_windows_64 =  search_windows(img, window_list_64, clf, X_scaler, color_space=colorspace, 
                                      spatial_size=(spatial,spatial), hist_bins=histbins, orient=orient, 
                                      pix_per_cell=pix_per_cell, cell_per_block=cell_per_block, hog_channel=hog_channel)
    good_windows_96 =  search_windows(img, window_list_96, clf, X_scaler, color_space=colorspace, 
                                      spatial_size=(spatial,spatial), hist_bins=histbins, orient=orient, 
                                      pix_per_cell=pix_per_cell, cell_per_block=cell_per_block, hog_channel=hog_channel)
    good_windows_120 =  search_windows(img, window_list_120, clf, X_scaler, color_space=colorspace, 
                                      spatial_size=(spatial,spatial), hist_bins=histbins, orient=orient, 
                                      pix_per_cell=pix_per_cell, cell_per_block=cell_per_block, hog_channel=hog_channel)
    good_windows_140 =  search_windows(img, window_list_140, clf, X_scaler, color_space=colorspace, 
                                      spatial_size=(spatial,spatial), hist_bins=histbins, orient=orient, 
                                      pix_per_cell=pix_per_cell, cell_per_block=cell_per_block, hog_channel=hog_channel)
    good_windows_160 =  search_windows(img, window_list_160, clf, X_scaler, color_space=colorspace, 
                                      spatial_size=(spatial,spatial), hist_bins=histbins, orient=orient, 
                                      pix_per_cell=pix_per_cell, cell_per_block=cell_per_block, hog_channel=hog_channel)

    all_good_windows = good_windows_64 + good_windows_96 + good_windows_120 + good_windows_140 + good_windows_160

    heat = np.zeros_like(img[:,:,0]).astype(np.float)
    pred_heat = add_heat(heat, all_good_windows)
    if last_heats is None:
        last_heats = [pred_heat, pred_heat, pred_heat, pred_heat]
    else:
        heat = (pred_heat + last_heats[0] + last_heats[1] + last_heats[2] + last_heats[3])
    del last_heats[0]
    last_heats.append(pred_heat)    
    heat = apply_threshold(heat, 2)
    heatmap = np.clip(heat, 0, 255)
    labels = label(heatmap)

    final_img = draw_labeled_bboxes(np.copy(img), labels)
    return final_img

In [8]:
### TEST VIDEO
# output = 'processed_test_video.mp4'
# clip = VideoFileClip('CarND-Vehicle-Detection/test_video.mp4')
# processed_clip = clip.fl_image(process_image)
# %time processed_clip.write_videofile(output, audio=False)

In [9]:
### PROJECT VIDEO
output = 'processed_project_video.mp4'
clip = VideoFileClip('CarND-Vehicle-Detection/project_video.mp4')
processed_clip = clip.fl_image(process_image)
%time processed_clip.write_videofile(output, audio=False)

[MoviePy] >>>> Building video processed_project_video.mp4
[MoviePy] Writing video processed_project_video.mp4


100%|█████████▉| 1260/1261 [30:34<00:01,  1.37s/it]


[MoviePy] Done.
[MoviePy] >>>> Video ready: processed_project_video.mp4 

CPU times: user 1h 37min 59s, sys: 4min 39s, total: 1h 42min 38s
Wall time: 30min 35s
