# Udacity Nanaodegree Program "Self Driving Car Engineer"
## Vehicle Detection Project

The goals / steps of this project are the following:

* Perform a Histogram of Oriented Gradients (HOG) feature extraction on a labeled training set of images and train a classifier Linear SVM classifier
* Optionally, you can also apply a color transform and append binned color features, as well as histograms of color, to your HOG feature vector. 
* Note: for those first two steps don't forget to normalize your features and randomize a selection for training and testing.
* Implement a sliding-window technique and use your trained classifier to search for vehicles in images.
* Run your pipeline on a video stream (start with the test_video.mp4 and later implement on full project_video.mp4) and create a heat map of recurring detections frame by frame to reject outliers and follow detected vehicles.
* Estimate a bounding box for vehicles detected.



#### Import required modules

In [None]:
from skimage.feature import hog
from sklearn import svm, grid_search
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from scipy.ndimage.measurements import label
import cv2
import glob
import imp
import matplotlib.image as mpimg
import matplotlib.pyplot as plt 
from moviepy.editor import VideoFileClip
import numpy as np
import os
import pickle
import random
import time
%matplotlib inline


#### Find Training Images

In [None]:
car_img_names =  glob.glob("training_data/cars/*/*")
notcar_img_names = glob.glob("training_data/notcars/*/*")
print("training images - cars:", len(car_img_names), "   notcars:", len(notcar_img_names))

#### Define a functions to convert the colorspace of an image and to get the hog features
This code was taken from the lesson materials

In [None]:
def convert_color(img, conv='RGB2YCrCb'):
    if conv == 'RGB2YCrCb':
        return cv2.cvtColor(img, cv2.COLOR_RGB2YCrCb)
    if conv == 'BGR2YCrCb':
        return cv2.cvtColor(img, cv2.COLOR_BGR2YCrCb)
    if conv == 'RGB2LUV':
        return cv2.cvtColor(img, cv2.COLOR_RGB2LUV)
    print("Error: Color space {:s} is not supported!".format(conv))
    return None

    
def get_hog_features(img, orient, pix_per_cell, cell_per_block, 
                        vis=False, feature_vec=True):
    # Call with two outputs if vis==True
    if vis == True:
        features, hog_image = hog(img, orientations=orient, 
                                  pixels_per_cell=(pix_per_cell, pix_per_cell),
                                  cells_per_block=(cell_per_block, cell_per_block), 
                                  transform_sqrt=False, 
                                  visualise=vis, feature_vector=feature_vec)
        return features, hog_image
    # Otherwise call with one output
    else:      
        features = hog(img, orientations=orient, 
                       pixels_per_cell=(pix_per_cell, pix_per_cell),
                       cells_per_block=(cell_per_block, cell_per_block), 
                       transform_sqrt=False, 
                       visualise=vis, feature_vector=feature_vec)
        return features
    

def bin_spatial(img, size=(32, 32)):
    color1 = cv2.resize(img[:,:,0], size).ravel()
    color2 = cv2.resize(img[:,:,1], size).ravel()
    color3 = cv2.resize(img[:,:,2], size).ravel()
    return np.hstack((color1, color2, color3))
                        
    
def color_hist(img, nbins=32):    #bins_range=(0, 256)
    # Compute the histogram of the color channels separately
    channel1_hist = np.histogram(img[:,:,0], bins=nbins)
    channel2_hist = np.histogram(img[:,:,1], bins=nbins)
    channel3_hist = np.histogram(img[:,:,2], bins=nbins)
    # Concatenate the histograms into a single feature vector
    hist_features = np.concatenate((channel1_hist[0], channel2_hist[0], channel3_hist[0]))
    # Return the individual histograms, bin_centers and feature vector
    return hist_features

#### Define a function to extract the HOG features from an array of car images and non-car images
This code was taken from the lesson materials

In [None]:
# Define a function to extract features from a list of images
def extract_features(imgs, color_space='RGB', spatial_size=(32, 32),
                        hist_bins=32, orient=9, 
                        pix_per_cell=8, cell_per_block=2, hog_channel=0,
                        spatial_feat=True, hist_feat=True, hog_feat=True):
    # Create a list to append feature vectors to
    features = []
    # Iterate through the list of images
    for file in imgs:
        file_features = []
        # Read in each one by one
        image = mpimg.imread(file)
        # apply color conversion if other than 'RGB'
        if color_space != 'RGB':
            if False:
                feature_image = convert_color(image, "RGB2"+color_space)
            else:
                if color_space == 'HSV':
                    feature_image = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)
                elif color_space == 'LUV':
                    feature_image = cv2.cvtColor(image, cv2.COLOR_RGB2LUV)
                elif color_space == 'HLS':
                    feature_image = cv2.cvtColor(image, cv2.COLOR_RGB2HLS)
                elif color_space == 'YUV':
                    feature_image = cv2.cvtColor(image, cv2.COLOR_RGB2YUV)
                elif color_space == 'YCrCb':
                    feature_image = cv2.cvtColor(image, cv2.COLOR_RGB2YCrCb)
        else: 
            feature_image = np.copy(image)      

        if hog_feat == True:
            # Call get_hog_features() with vis=False, feature_vec=True
            if hog_channel == 'ALL':
                hog_features = []
                for channel in range(feature_image.shape[2]):
                    hog_features.append(get_hog_features(feature_image[:,:,channel], 
                                        orient, pix_per_cell, cell_per_block, 
                                        vis=False, feature_vec=True))
                hog_features = np.ravel(hog_features)        
            else:
                hog_features = get_hog_features(feature_image[:,:,hog_channel], orient, 
                            pix_per_cell, cell_per_block, vis=False, feature_vec=True)
            # Append the new feature vector to the features list
            file_features.append(hog_features)

        if spatial_feat == True:
            spatial_features = bin_spatial(feature_image, size=spatial_size)
            file_features.append(spatial_features)

        if hist_feat == True:
            # Apply color_hist()
            hist_features = color_hist(feature_image, nbins=hist_bins)
            file_features.append(hist_features)

        features.append(np.concatenate(file_features))
    # Return list of feature vectors
    return features


#### Extract the feature from the training images and combine them with the appropriate labels

In [None]:
# Feature extraction
# COLORSPACE is one out of "BGR", "HSV", "LUV", "HLS", "YUV", "YCrCb"
COLORSPACE = "YCrCb"
# HOG extraction parameters
ORIENTATION = 9
PIX_PER_CELL = 8
CELL_PER_BLOCK = 2
# HOG_CHANNEL is one out of 0, 1, 2, "ALL"
HOG_CHANNEL = "ALL"
HIST_FEAT = False
SPATIAL_FEAT = False

print("HOG parameters: orient:", ORIENTATION, " pix_per_cell:", PIX_PER_CELL, " cell_per_block:", CELL_PER_BLOCK, " channel:", HOG_CHANNEL)

t0 = time.time()
car_features = extract_features(car_img_names, color_space=COLORSPACE, orient=ORIENTATION,
                                pix_per_cell=PIX_PER_CELL, cell_per_block=CELL_PER_BLOCK,
                                hog_channel=HOG_CHANNEL, 
                                hist_feat=HIST_FEAT, spatial_feat=SPATIAL_FEAT)

notcar_features = extract_features(notcar_img_names, color_space=COLORSPACE, orient=ORIENTATION,
                                pix_per_cell=PIX_PER_CELL, cell_per_block=CELL_PER_BLOCK,
                                hog_channel=HOG_CHANNEL, 
                                hist_feat=HIST_FEAT, spatial_feat=SPATIAL_FEAT)
print("Seconds to extract HOG features: {:.3f}".format(time.time()-t0))

# Combine car features and non-car features into one array
X = np.vstack([car_features, notcar_features])

if HIST_FEAT or SPATIAL_FEAT:
    X_scaler = StandardScaler().fit(X)
    # Apply the scaler to X
    scaled_X = X_scaler.transform(X)
else:
    X_scaler = None
    # Scaling is not needed if only HOGs are used
    scaled_X = X

# Create the labels vector
labels = np.hstack([np.ones(len(car_img_names)), np.zeros(len(notcar_img_names))])

print("X.shape:", X.shape)

#### Shuffle and split the data and train the classifier

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size = 0.2,
                                                    random_state=np.random.randint(0, 100))

# Use a linear SVC as classifier
clf = svm.LinearSVC()

t0 = time.time()
clf.fit(X_train, y_train)
print("Seconds to train the classifier: {:.3f}".format(time.time()-t0))

# Check that accuracy of the classifier
acc = accuracy_score(y_test, clf.predict(X_test))
print("Accuracy of the classifier: {:.3f}%".format(acc*100))


#### Define a function to detect the cars
This code was partly taken form the lesson materials

In [None]:
# Define a single function that can extract features using hog sub-sampling and make predictions
def find_cars(img, ystart, ystop, scale, svc, X_scaler, color_space, orient, pix_per_cell, cell_per_block, spatial_size=(32, 32), hist_bins=32, show_boxes=False):

    if show_boxes:
        colors = [(0,0,255), (0,255,255), (0,255,0), (255,255,0), (255,0,0), (255,0,255), (255,255,255)]
#    print("orient:", orient, " pix/cell:", pix_per_cell, " cell/block:", cell_per_block) #, " spatial:", spatial_size, " bins:", hist_bins)
    
    draw_img = np.copy(img)
    img = img.astype(np.float32)/255
    
    img_tosearch = img[ystart:ystop,:,:]
    ctrans_tosearch = convert_color(img_tosearch, conv="RGB2"+color_space)
    if scale != 1:
        imshape = ctrans_tosearch.shape
        ctrans_tosearch = cv2.resize(ctrans_tosearch, (np.int(imshape[1]/scale), np.int(imshape[0]/scale)))
        
    ch1 = ctrans_tosearch[:,:,0]
    ch2 = ctrans_tosearch[:,:,1]
    ch3 = ctrans_tosearch[:,:,2]

    # Define blocks and steps as above
    nxblocks = (ch1.shape[1] // pix_per_cell) - cell_per_block + 1
    nyblocks = (ch1.shape[0] // pix_per_cell) - cell_per_block + 1 
    nfeat_per_block = orient*cell_per_block**2
    
    # 64 was the orginal sampling rate, with 8 cells and 8 pix per cell
    window = 64
    nblocks_per_window = (window // pix_per_cell) - cell_per_block + 1
    cells_per_step = 2  # Instead of overlap, define how many cells to step
    nxsteps = (nxblocks - nblocks_per_window) // cells_per_step
    nysteps = (nyblocks - nblocks_per_window) // cells_per_step
    
    # Compute individual channel HOG features for the entire image
    hog1 = get_hog_features(ch1, orient, pix_per_cell, cell_per_block, feature_vec=False)
    hog2 = get_hog_features(ch2, orient, pix_per_cell, cell_per_block, feature_vec=False)
    hog3 = get_hog_features(ch3, orient, pix_per_cell, cell_per_block, feature_vec=False)
    
    boxes = []
    for xb in range(nxsteps+1):
        for yb in range(nysteps+1):
            features = []
            ypos = yb*cells_per_step
            xpos = xb*cells_per_step
            # Extract HOG for this patch
            hog_feat1 = hog1[ypos:ypos+nblocks_per_window, xpos:xpos+nblocks_per_window].ravel() 
            hog_feat2 = hog2[ypos:ypos+nblocks_per_window, xpos:xpos+nblocks_per_window].ravel() 
            hog_feat3 = hog3[ypos:ypos+nblocks_per_window, xpos:xpos+nblocks_per_window].ravel() 
            hog_features = np.hstack((hog_feat1, hog_feat2, hog_feat3))

            xleft = xpos*pix_per_cell
            ytop = ypos*pix_per_cell

            if HIST_FEAT or SPATIAL_FEAT:
                # Use spatial features and color histograms
                # Extract the image patch
                subimg = cv2.resize(ctrans_tosearch[ytop:ytop+window, xleft:xleft+window], (64,64))

                if SPATIAL_FEAT:
                    # Get color features
                    spatial_features = bin_spatial(subimg, size=spatial_size)
                if HIST_FEAT:
                    hist_features = color_hist(subimg, nbins=hist_bins)

                # Scale features and make a prediction
                test_features = X_scaler.transform(np.hstack((spatial_features, hist_features, hog_features)).reshape(1, -1))    
                #test_features = X_scaler.transform(np.hstack((shape_feat, hist_feat)).reshape(1, -1))
                test_prediction = svc.predict(test_features)
            else:
                # Do not use spatial features and color histograms
                test_prediction = svc.predict(hog_features)

            xbox_left = np.int(xleft*scale)
            ybox_top = np.int(ytop*scale) + ystart
            ytop_draw = np.int(ytop*scale)
            win_size = np.int(window*scale)

            if show_boxes:
#                cv2.rectangle(draw_img,(xbox_left, ytop_draw+ystart),(xbox_left+win_size,ytop_draw+win_size+ystart),(0,0,255),6) 
                cv2.rectangle(draw_img,(xbox_left, ybox_top),(xbox_left+win_size,ybox_top+win_size),colors[yb],3) 

            if test_prediction == 1:
                boxes.append([xbox_left, ybox_top, win_size])
    if show_boxes:
        return np.array(boxes), draw_img
    else:
        return np.array(boxes)


#### Load a test image and find the cars

In [None]:
def apply_threshold(heatmap, value):
    heatmap[heatmap < value] = 0
    return heatmap

heatmap_smooth = None

def process_img(img):
    global heatmap_smooth
    img_out = np.copy(img)
    heatmap = np.zeros_like(img[:,:,0])
    boxes = []
    cars = []

    scale_dim = {1.0: (400, 492),
                 1.5: (400, 528),
                 2.0: (400, 560),
                 2.5: (400, 600),
                 3.0: (400, 592),
                }

    for s in scale_dim:
        found = find_cars(img, scale_dim[s][0], scale_dim[s][1], s, 
                               svc=clf, X_scaler=X_scaler, color_space=COLORSPACE, 
                               orient=ORIENTATION, pix_per_cell=PIX_PER_CELL, 
                               cell_per_block=CELL_PER_BLOCK)
        if len(found) > 0:
            boxes.append(found)
            
    if len(boxes) > 0:
        for b in np.concatenate(boxes):
            color = COLORS[b[2]]
            cv2.rectangle(img_out, (b[0], b[1]), (b[0]+b[2], b[1]+b[2]), color, 2)
            heatmap[b[1]:b[1]+b[2], b[0]:b[0]+b[2]] += 1

        heatmap_smooth *= 5
        heatmap_smooth //= 10
        heatmap_smooth += heatmap

        # Apply threshold to the heat map
        heatmap_thresh = apply_threshold(np.copy(heatmap_smooth), 2)
        # Assign labels
        labels, num_labels = label(heatmap_thresh)

        # Draw bounding rectangles
        img_draw = np.copy(img)
        for car_id in range(1, num_labels+1):
            nonzero = (labels == car_id).nonzero()
            if len(nonzero[0]) > 0:
                nonzerox = np.array(nonzero[1])
                nonzeroy = np.array(nonzero[0])
                bbox = ((np.max(nonzerox)+np.min(nonzerox))//2, (np.max(nonzeroy)+np.min(nonzeroy))//2,
                        np.max(nonzerox) - np.min(nonzerox), np.max(nonzeroy) - np.min(nonzeroy))
                cars.append(bbox)
    return cars, heatmap_smooth, img_out
    

In [None]:
COLORS = { 64: (0,0,255),
           96: (0,255,255),
          128: (0,255,0),
          160: (255,255,0),
          192: (255,0,0)}
#img = mpimg.imread("tmp/project_frame_734.jpg")
img = mpimg.imread("test_images/test4.jpg")

img_draw = np.copy(img)
heatmap_smooth = np.zeros_like(img[:,:,0])

if False:
    found, img_out = find_cars(img, 400, 492, 1.0, 
                           svc=clf, X_scaler=X_scaler, color_space=COLORSPACE, 
                           orient=ORIENTATION, pix_per_cell=PIX_PER_CELL, 
                           cell_per_block=CELL_PER_BLOCK, show_boxes=True)
else:
    t0 = time.time()
    cars, heatmap, img_out = process_img(img)  
    print("Processing took {:.3f} seconds".format(time.time()-t0))
    cars.sort()
    for c in cars:
        xleft = c[0]-c[2]//2
        xright = c[0]+c[2]//2
        ytop  = c[1]-c[3]//2
        ybottom = c[1]+c[3]//2
        cv2.rectangle(img_draw, (xleft, ytop), (xright, ybottom), (0,0,255), 6)

f, sub = plt.subplots(3, 1, figsize=(60,20))
sub[0].imshow(img_out)
sub[0].set_title("Boxes")
sub[1].imshow(heatmap, cmap="gray")
sub[1].set_title("Heatmap after applying threshold")
sub[2].imshow(img_draw)
sub[2].set_title("Found cars")

In [None]:
# Select the video: "project" or "test"
video_name = "project"

frame_name = "tmp/{:s}_frame_{:d}.jpg"
frames_to_save = [16, 24, 613, 734, 1046, 1103]

NUM_PREV_CARS = 10

def process_video_frame(img):
    process_video_frame.frame_no += 1
    
    if process_video_frame.frame_no in frames_to_save:
        mpimg.imsave(frame_name.format(video_name, process_video_frame.frame_no), img)
    
    if len(process_video_frame.prev_cars) > NUM_PREV_CARS:
        del process_video_frame.prev_cars[0]

    cars, heatmap, img_out = process_img(img) 
    cars.sort()
    
    filt_cars = []
    for c in cars:
        c_count = 0
        for i in range(len(process_video_frame.prev_cars)):
            prev_cars = process_video_frame.prev_cars[i]
            max_dist = (NUM_PREV_CARS-i)*20
            for pc in prev_cars:
                if (pc[0]-max_dist) < c[0] and c[0] < (pc[0]+max_dist):
                    c_count += 1
                    break
        if c_count >= NUM_PREV_CARS-2:
            xleft = c[0]-c[2]//2
            xright = c[0]+c[2]//2
            ytop  = c[1]-c[3]//2
            ybottom = c[1]+c[3]//2
            cv2.rectangle(img, (xleft, ytop), (xright, ybottom), (0,0,255), 6)

    process_video_frame.prev_cars.append(cars)

    cv2.putText(img, "Frame: {:d}".format(process_video_frame.frame_no), 
                (50,50), cv2.FONT_HERSHEY_PLAIN, 2, (255, 255, 255))

#    small_heatmap = cv2.cvtColor(cv2.resize(np.copy(heatmap)*30, 
#                                            (heatmap.shape[1]//2, heatmap.shape[0]//2)),
#                                             cv2.COLOR_GRAY2BGR)
#    img[0:small_heatmap.shape[0], img.shape[1]-small_heatmap.shape[1]:img.shape[1], :] = small_heatmap
    return img

heatmap_smooth = np.zeros_like(img[:,:,0])

process_video_frame.frame_no = 0
process_video_frame.prev_cars = []
video_out_name = "./output_images/{:s}_video.mp4".format(video_name)
video_in = VideoFileClip("{:s}_video.mp4".format(video_name))
video_out = video_in.fl_image(process_video_frame)
%time video_out.write_videofile(video_out_name, audio=False)
