In [None]:
import numpy as np
import cv2
from sklearn.cross_validation import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC
from skimage.feature import hog
from scipy.ndimage.measurements import label
import glob
import time
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from moviepy.editor import VideoFileClip

%matplotlib inline
from IPython.display import HTML

In [None]:
class DataStore():
    def __init__(self):
        
        print('Loading data...')
        t1 = time.time()
        
        self.car_fnames = glob.glob('images/vehicles/*/*.png')
        self.scn_fnames = glob.glob('images/non-vehicles/*/*.png')
        self.car_images = [mpimg.imread(fname) for fname in self.car_fnames]
        self.scn_images = [mpimg.imread(fname) for fname in self.scn_fnames]

        t2 = time.time()
        print('Loaded all data in {} seconds'.format(round(t2-t1), 2))

        
ds = DataStore()

In [None]:
class VehicleDetection():

    
    def __init__(self, data_store):
        
        self.color_space = 'HLS'

        self.feat_spatial = False
        self.feat_hist = False
        self.feat_hog = True
        
        self.spatial_size = (32, 32)
        
        self.hist_bins = 32
        self.hist_channels = [0, 1, 2]
        
        self.orient = 9
        self.pix_per_cell = 8
        self.cell_per_block = 2
        self.hog_channels = [0, 1, 2]

        self.window_config = [
            {
                'y_start_stop': [400, None],
                'xy_window': (80, 80),
                'xy_overlap': (0.8, 0.8)
            },
            {
                'y_start_stop': [400, None],
                'xy_window': (120, 120),
                'xy_overlap': (0.8, 0.8)
            },
            {
                'y_start_stop': [400, None],
                'xy_window': (160, 160),
                'xy_overlap': (0.8, 0.8)
            },
            {
                'y_start_stop': [400, None],
                'xy_window': (24, 240),
                'xy_overlap': (0.8, 0.8)
            }
        ]

        self.find_box_config = [
            {
                'ystart': 400,
                'ystop': 500,
                'scale': 1
            },
            {
                'ystart': 400,
                'ystop': 600,
                'scale': 1.5
            },
            {
                'ystart': 400,
                'ystop': 720,
                'scale': 2
            }            
        ]
        
        t1 = time.time()
        print('Extracting features...')
        
        self.car_images = [self.convert_color(img) for img in data_store.car_images]
        self.scn_images = [self.convert_color(img) for img in data_store.scn_images]
        
        car_features = self.extract_features_for_imgs(self.car_images)
        scn_features = self.extract_features_for_imgs(self.scn_images)

        unscaled_X = np.vstack((car_features, scn_features)).astype(np.float64)
        self.scaler = StandardScaler().fit(unscaled_X)
        
        self.X = self.scaler.transform(unscaled_X)
        self.y = np.hstack((np.ones(len(car_features)), np.zeros(len(scn_features))))
        
        t2 = time.time()
        print('Extracted all features in {} seconds'.format(round(t2-t1), 2))

        
        t3 = time.time()
        print('Training SVM...')
        
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(self.X, self.y, test_size=0.2, random_state=np.random.randint(0, 100))
        self.svc = LinearSVC()
        self.svc.fit(self.X_train, self.y_train)
        score = self.svc.score(self.X_test, self.y_test)
        
        t4 = time.time()
        print('Trained SVM in {} seconds with {} accuracy'.format(round(t4-t3, 2), score))

        
    def draw_boxes(self, img, boxes, color=(255,0,0), thick=6):
        imcopy = np.copy(img)
        for box in boxes:
            cv2.rectangle(imcopy, box[0], box[1], color, thick)
        return imcopy
    
        
    def convert_color(self, img):
        if self.color_space == 'HSV':
            return cv2.cvtColor(img, cv2.COLOR_RGB2HSV)
        elif self.color_space == 'LUV':
            return cv2.cvtColor(img, cv2.COLOR_RGB2LUV)
        elif self.color_space == 'HLS':
            return cv2.cvtColor(img, cv2.COLOR_RGB2HLS)
        elif self.color_space == 'YUV':
            return cv2.cvtColor(img, cv2.COLOR_RGB2YUV)
        elif self.color_space == 'YCrCb':
            return cv2.cvtColor(img, cv2.COLOR_RGB2YCrCb)
        else:
            return np.copy(img)
        
    
    def get_bin_spatial(self, img):
        return cv2.resize(img, self.spatial_size).ravel()
        
        
    def get_color_hist(self, img):
        return np.concatenate([np.histogram(img[:,:,ch], bins=self.hist_bins, range=self.bins_range)[0] for ch in self.hist_channels])
        
    
    def get_hog(self, img, feature_vector=True):
        return hog(img, orientations=self.orient,
                   pixels_per_cell=(self.pix_per_cell, self.pix_per_cell),
                   cells_per_block=(self.cell_per_block, self.cell_per_block), 
                   transform_sqrt=True,
                   visualise=False,
                   feature_vector=feature_vector)
    
    
    def extract_features_for_img(self, img):
        features = []
        if self.feat_spatial:
            features.append(self.get_bin_spatial(img))
        if self.feat_hist:
            features.append(self.get_color_hist(img))
        if self.feat_hog:
            hog_features = []
            for ch in self.hog_channels:
                hog_features.extend(self.get_hog(img[:,:,ch]))
            features.append(hog_features)
        return np.concatenate(features)
    
    
    def extract_features_for_imgs(self, imgs):
        return [self.extract_features_for_img(img) for img in imgs]
    
    
    def slide_window(self, img, x_start_stop=[None, None], y_start_stop=[None, None], 
                     xy_window=(64, 64), xy_overlap=(0.5, 0.5)):
        # If x and/or y start/stop positions not defined, set to image size
        if x_start_stop[0] == None:
            x_start_stop[0] = 0
        if x_start_stop[1] == None:
            x_start_stop[1] = img.shape[1]
        if y_start_stop[0] == None:
            y_start_stop[0] = 0
        if y_start_stop[1] == None:
            y_start_stop[1] = img.shape[0]
        # Compute the span of the region to be searched    
        xspan = x_start_stop[1] - x_start_stop[0]
        yspan = y_start_stop[1] - y_start_stop[0]
        # Compute the number of pixels per step in x/y
        nx_pix_per_step = np.int(xy_window[0]*(1 - xy_overlap[0]))
        ny_pix_per_step = np.int(xy_window[1]*(1 - xy_overlap[1]))
        # Compute the number of windows in x/y
        nx_buffer = np.int(xy_window[0]*(xy_overlap[0]))
        ny_buffer = np.int(xy_window[1]*(xy_overlap[1]))
        nx_windows = np.int((xspan-nx_buffer)/nx_pix_per_step) 
        ny_windows = np.int((yspan-ny_buffer)/ny_pix_per_step) 
        # Initialize a list to append window positions to
        window_list = []
        # Loop through finding x and y window positions
        # Note: you could vectorize this step, but in practice
        # you'll be considering windows one by one with your
        # classifier, so looping makes sense
        for ys in range(ny_windows):
            for xs in range(nx_windows):
                # Calculate window position
                startx = xs*nx_pix_per_step + x_start_stop[0]
                endx = startx + xy_window[0]
                starty = ys*ny_pix_per_step + y_start_stop[0]
                endy = starty + xy_window[1]
                
                # Append window position to list
                window_list.append(((startx, starty), (endx, endy)))
        # Return the list of windows
        return window_list

    
    def search_windows(self, img, windows):
        hot_windows = []
        for window in windows:
            # Extract sub-image using window and resize to 64x64
            sub_img = cv2.resize(img[window[0][1]:window[1][1], window[0][0]:window[1][0]], (64, 64))
            # Extract feature from sub-image
            features = self.extract_features_for_img(sub_img)
            # Scale extracted features to be fed to classifier
            features = self.scaler.transform(np.array(features).reshape(1, -1))
            # Predict if sub-image is a car using classifier
            prediction = self.svc.predict(features)
            if prediction == 1:
                hot_windows.append(window)
        return hot_windows
    
    
    def multiplex_windows(self, img):
        windows = []
        for cfg in self.window_config:
            windows += self.slide_window(img,
                                         y_start_stop=cfg['y_start_stop'],
                                         xy_window=cfg['xy_window'],
                                         xy_overlap=cfg['xy_overlap'])
        return windows

    
    def process_image(self, img):
        draw_img = np.copy(img)
        img = self.convert_color(img)
        windows = self.multiplex_windows(img)
        hot_windows = self.search_windows(img, windows)
        draw_img = self.draw_boxes(draw_img, hot_windows)
        return draw_img

    
    def process_image2(self, img, threshold=1):
        
        # Copy image and convert color space
        draw_img = np.copy(img)
        img = self.convert_color(img)
        
        # Search for hot boxes
        windows = self.multiplex_windows(img)
        boxes = self.search_windows(img, windows)

        # Convert hot boxes to heatmap
        heatmap = np.zeros_like(img[:,:,0]).astype(np.float)
        for box in boxes:
            heatmap[box[0][1]:box[1][1], box[0][0]:box[1][0]] += 1
        heatmap[heatmap <= threshold] = 0
        heatmap = np.clip(heatmap, 0, 255)
        
        # Draw thresholded boxes on original image
        labels = label(heatmap)
        for car_number in range(1, labels[1]+1):
            # Find pixels with each car_number label value
            nonzero = (labels[0] == car_number).nonzero()
            # Identify x and y values of those pixels
            nonzeroy = np.array(nonzero[0])
            nonzerox = np.array(nonzero[1])
            # Define a bounding box based on min/max x and y
            bbox = ((np.min(nonzerox), np.min(nonzeroy)), (np.max(nonzerox), np.max(nonzeroy)))
            # Draw the box on the image
            cv2.rectangle(draw_img, bbox[0], bbox[1], (0,0,255), 6)
        
        return draw_img

    
    def find_boxes(self, img, ystart, ystop, scale, cells_per_step=2):        

        img_tosearch = img[ystart:ystop,:,:]
        ctrans_tosearch = self.convert_color(img_tosearch)

        if scale != 1:
            imshape = ctrans_tosearch.shape
            ctrans_tosearch = cv2.resize(ctrans_tosearch, (np.int(imshape[1]/scale), np.int(imshape[0]/scale)))
            
        ch0 = ctrans_tosearch[:,:,0]
        ch1 = ctrans_tosearch[:,:,1]
        ch2 = ctrans_tosearch[:,:,2]
    
        # Define blocks and steps as above
        nxblocks = (ch0.shape[1] // self.pix_per_cell) - 1
        nyblocks = (ch0.shape[0] // self.pix_per_cell) - 1 
        nfeat_per_block = self.orient * self.cell_per_block ** 2
        # 64 was the orginal sampling rate, with 8 cells and 8 pix per cell
        window = 64
        nblocks_per_window = (window // self.pix_per_cell)-1 
        
        # Instead of overlap, use how many cells to step
        nxsteps = (nxblocks - nblocks_per_window) // cells_per_step
        nysteps = (nyblocks - nblocks_per_window) // cells_per_step
        
        # Compute individual channel HOG features for the entire image
        hog0 = self.get_hog(ch0, feature_vector=False)
        hog1 = self.get_hog(ch1, feature_vector=False)
        hog2 = self.get_hog(ch2, feature_vector=False)
        
        hot_boxes = []
        
        for xb in range(nxsteps):
            for yb in range(nysteps):
                                
                ypos = yb * cells_per_step
                xpos = xb * cells_per_step
                # Extract HOG for this patch
                hog_feat0 = hog0[ypos:ypos+nblocks_per_window, xpos:xpos+nblocks_per_window].ravel() 
                hog_feat1 = hog1[ypos:ypos+nblocks_per_window, xpos:xpos+nblocks_per_window].ravel() 
                hog_feat2 = hog2[ypos:ypos+nblocks_per_window, xpos:xpos+nblocks_per_window].ravel() 
                hog_features = np.hstack((hog_feat0, hog_feat1, hog_feat2))
        
                xleft = xpos * self.pix_per_cell
                ytop = ypos * self.pix_per_cell
    
                # Extract the image patch
                subimg = cv2.resize(ctrans_tosearch[ytop:ytop+window, xleft:xleft+window], (64,64))
                
                # Get color features
                # spatial_features = self.get_bin_spatial(subimg)
                # hist_features = self.get_color_hist(subimg)
    
                # Scale features and make a prediction
                test_features = self.scaler.transform(hog_features)    
                #test_features = X_scaler.transform(np.hstack((shape_feat, hist_feat)).reshape(1, -1))
                test_prediction = self.svc.predict(test_features)

                if test_prediction == 1:
                    xbox_left = np.int(xleft*scale)
                    ytop_draw = np.int(ytop*scale)
                    win_draw = np.int(window*scale)
                    box = ((xbox_left, ytop_draw+ystart), (xbox_left+win_draw,ytop_draw+win_draw+ystart))
                    hot_boxes.append(box)
                    
        return hot_boxes
    
    
    def process_image3(self, img, threshold=2):
        draw_img = np.copy(img)
        
        boxes = []
        for cfg in self.find_box_config:
            boxes += self.find_boxes(img, cfg['ystart'], cfg['ystop'], cfg['scale']) 

        heatmap = np.zeros_like(img[:,:,0]).astype(np.float)
        for box in boxes:
            heatmap[box[0][1]:box[1][1], box[0][0]:box[1][0]] += 1
        heatmap[heatmap <= threshold] = 0
        heatmap = np.clip(heatmap, 0, 255)
        
        # Draw thresholded boxes on original image
        labels = label(heatmap)
        for car_number in range(1, labels[1]+1):
            # Find pixels with each car_number label value
            nonzero = (labels[0] == car_number).nonzero()
            # Identify x and y values of those pixels
            nonzeroy = np.array(nonzero[0])
            nonzerox = np.array(nonzero[1])
            # Define a bounding box based on min/max x and y
            bbox = ((np.min(nonzerox), np.min(nonzeroy)), (np.max(nonzerox), np.max(nonzeroy)))
            # Draw the box on the image
            cv2.rectangle(draw_img, bbox[0], bbox[1], (0,0,255), 6)
        
        return draw_img
        
    
    
vd = VehicleDetection(ds)

In [None]:
#img = vd.process_image2(mpimg.imread('test_images/test5.jpg', 2))
img = mpimg.imread('test_images/test4.jpg')
img = vd.process_image3(img)
plt.imshow(img)

In [None]:
clip = VideoFileClip('project_video.mp4')
new_clip = clip.fl_image(vd.process_image3)
%time new_clip.write_videofile('test_output.mp4', audio=False)

In [None]:
HTML("""
<video width="640" height="480" controls>
  <source src="test_output.mp4" type="video/mp4">
</video>
""")