In [1]:
### Import the necessary libraries and packages
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import numpy as np
import cv2
import glob
import time
from sklearn.svm import LinearSVC
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from scipy.ndimage.measurements import label
from skimage.feature import hog
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from scipy import ndimage
import pandas as pd
import tensorflow as tf
import scipy.misc
import random
from collections import defaultdict
from collections import Counter


In [2]:
# udacity_df = pd.read_csv('../data/labels.csv')
# udacity_cars_df = udacity_df.loc[(udacity_df.label == 'car') & (udacity_df.occluded == 0)]
# udacity_cars = udacity_cars_df.as_matrix()
# extract_images(udacity_cars, '../data/Udacity_dataset/', '../data/udacity_cropped/')

# udacity_car_files = glob.glob('../data/udacity_cropped/*jpg')
# plot_images(udacity_car_files[-200:-190], 5, 2)


In [3]:
def plot_images(files, rows, cols):
    fig, axes = plt.subplots(rows, cols)
    plt.tight_layout()
    # plt.suptitle(title, fontsize=10)
    for i, ax in enumerate(axes.ravel()):
        image = mpimg.imread(files[i])
        ax.imshow(image)
        

In [4]:
# Define a function to extract the target objects from the larger Udacity image and store them to a folder
def extract_images(data, data_path, save_path):
    for ii, img in enumerate(data):
         # Read in each one by one
        file = data_path + img[0]
        image = mpimg.imread(file)
        # Isolate the region in the image that contains the object
        xmin, ymin, xmax, ymax = img[1], img[2], img[3], img[4]
        image = image[ymin:ymax + 1, xmin:xmax + 1]
        image = cv2.resize(image, (64, 64))
        filler = '00000'
        mpimg.imsave(save_path + 'image_' + filler[:len(filler) - len(str(ii + 1))] + str(ii + 1) + '.png', image)
        

In [5]:
def convert_color(img, conv=None):
    image = np.copy(img)
    if conv != 'RGB':
        transform = "cv2.cvtColor(image, cv2.COLOR_" + conv + ")"
        features = eval(transform)
    else:
        features = image
    
    return features
    
# Define a function to compute binned color features  
def bin_spatial(img, size=(32, 32), channel='all'):
    if channel == 'all':
        channel = np.arange(3)
    
    features = []
    for ch in channel:
        color = cv2.resize(img[:, :, ch], size).ravel()
        features.append(color)
    spatial_features = np.concatenate(features)
            
    return spatial_features

# Define a function to compute color histogram features  
def color_hist(img, nbins=32, bins_range=(0, 256), channel='all'):   
    # Compute the histogram of the color channels separately
    if channel == 'all':
        channel = np.arange(3)
        
    features = []
    hist_features = []
    for ch in channel:
        channel_hist = np.histogram(img[:, :, ch], bins=nbins, range=bins_range)
        features.append(channel_hist[0])
    hist_features = np.concatenate(features)
        
    # Return the individual histograms, bin_centers and feature vector
    return hist_features

# Define a function to return HOG features and visualization
def get_hog_features(img, orient, pix_per_cell, cell_per_block, 
                        vis=False, feature_vec=True, trans_sqrt=True, block_norm='L1'):
    # Call with two outputs if vis==True
    if vis == True:
        features, hog_image = hog(img, orientations=orient, pixels_per_cell=(pix_per_cell, pix_per_cell),
                                  cells_per_block=(cell_per_block, cell_per_block), block_norm=block_norm,
                                  transform_sqrt=trans_sqrt, visualise=vis, feature_vector=feature_vec)
        return features, hog_image
    # Otherwise call with one output
    else:      
        features = hog(img, orientations=orient, pixels_per_cell=(pix_per_cell, pix_per_cell),
                       cells_per_block=(cell_per_block, cell_per_block), block_norm=block_norm,
                       transform_sqrt=trans_sqrt, visualise=vis, feature_vector=feature_vec)
        return features
    

In [6]:
# Define a function to extract features from a list of images
# Have this function call bin_spatial() and color_hist()
def extract_features(data, img_format='png', spatial_dict=None, hist_dict=None, hog_dict=None):
    
    # Create a list to append feature vectors to
    features = []
    
    # Iterate through the list of images
    for file in data:
        single_img_features = []
        # Read in each one by one
        image = mpimg.imread(file) 
        if ((img_format == 'jpg') | (file[-4:] == '.jpg')):
            image = image.astype(np.float32) / 255
        image = cv2.resize(image, (64, 64))
        
        if spatial_dict is not None:
            cspace = spatial_dict['conv']
            spatial_size = spatial_dict['size']
            channels = spatial_dict['channels']
            image_conv = convert_color(image, cspace)
            spatial_features = bin_spatial(image_conv, size=(spatial_size, spatial_size), channel=channels)
            single_img_features.append(spatial_features)
            
        if hist_dict is not None:
            cspace = hist_dict['conv']
            hist_bins = hist_dict['nbins']
            bin_range = hist_dict['bin_range']
            channels = hist_dict['channels']
            image_conv = convert_color(image, cspace)
            hist_features = color_hist(image_conv, nbins=hist_bins, bins_range=(0, 256), channel=channels)
            single_img_features.append(hist_features)

        if hog_dict is not None:
            cspace = hog_dict['conv']
            orient = hog_dict['orient']
            pix_per_cell = hog_dict['pix_per_cell']
            cell_per_block = hog_dict['cell_per_block']
            channels = hog_dict['channels']       
            trans_sqrt = hog_dict['trans_sqrt']
            block_norm = hog_dict['block_norm']
            image_conv = convert_color(image, cspace)  
            hog_features = []
            if channels == 'all':
                channels = np.arange(3)
            
            for ch in channels:
                hog_features.append(get_hog_features(image_conv[:,:,ch], 
                                    orient, pix_per_cell, cell_per_block, 
                                    vis=False, feature_vec=True, trans_sqrt=trans_sqrt, block_norm=block_norm))
            hog_features = np.ravel(hog_features)                      
            single_img_features.append(hog_features)
            
        features.append(np.concatenate(single_img_features))

    # Return list of feature vectors
    return features


In [7]:
import pickle

# Save a dictionary into a pickle file
def save_to_pickle(data, key_name, file_name):
    if len(file_name) > 1:
        for d, k, f in zip(data, key_name, file_name):
            pickle_data = {k: d}
            pickle.dump(pickle_data, open(f + '.p', "wb"))
    else:
        pickle_data = {}
        for d, k in zip(data, key_name):
            pickle_data[k] = d
        print(pickle_data)
        pickle.dump(pickle_data, open(file_name[0] + '.p', "wb"))
    
    return None
        

In [8]:
def augment_data(image_files, filepath, rotate=False, angle=25, flip=[], adj_bright=False):
    filler = '00000'
    for ii, file in enumerate(image_files):
        image = ndimage.imread(file)
        suffix = filler[:len(filler) - len(str(ii + 1))] + str(ii + 1) + '.png'
        if rotate:
            filename = filepath + 'rotated_image_' + suffix
            ang = random.uniform(-angle, angle)
            rotated_img = ndimage.rotate(image, ang, reshape=False)
            mpimg.imsave(filename, rotated_img)
        for fi in flip:
            assert fi in [0, 1, -1]
            if fi == 0:
                filename = filepath + 'horz_flip_image_' + suffix
            elif fi == 1:
                filename = filepath + 'vert_flip_image_' + suffix
            elif fi == -1:
                filename = filepath + 'both_flip_image_' + suffix
            flipped_img = cv2.flip(image, fi)
            mpimg.imsave(filename, flipped_img)
        if adj_bright:
            filename = filepath + 'bright_adj_image_' + suffix
            random_bright = .25 + np.random.uniform()
            random_bright = min(random_bright, 1.15)
            HSV_image = cv2.cvtColor(image,cv2.COLOR_RGB2HSV)
            HSV_image[:,:,2] = HSV_image[:,:,2] * random_bright
            bright_adj_image = cv2.cvtColor(HSV_image, cv2.COLOR_HSV2RGB)
            mpimg.imsave(filename, bright_adj_image)
            

In [9]:
def make_training_files(GTI=False, KITTI=False, udacity=False, augmented=False, rotated=None,
                        flipped=[], bright_adj=None, len_non_cars=None, debug=False):
    
    gti_car_far = []
    gti_car_left = []
    gti_car_midclose = []
    gti_car_right = []
    gti_car_files = []
    kitti_car_files = []
    udacity_car_files = []
    # Lists for augmented image files
    aug_gti_car_files = []
    aug_kitti_car_files = []
    aug_udacity_car_files = []
    aug_gti_non_car_files = []
    aug_extras_non_car_files = []
        
    if augmented:
        has_rotated = rotated is not None
        flipped_is_list = (type(flipped) == list)
        has_bright_adj = bright_adj is not None
        if not(has_rotated & flipped_is_list & has_bright_adj):
            raise Exception('If augmented is set to True, then you must provide True or False values for '
                            'rotated and bright_adj, and you must provide a list with values of 0, 1, and/or -1 '
                            'to flipped')
        
    if GTI:
        # Collect the filenames for all car images in the dataset
        gti_car_far = glob.glob('../data/vehicles/GTI_Far/*png')
        gti_car_left = glob.glob('../data/vehicles/GTI_Left/*png')
        gti_car_midclose = glob.glob('../data/vehicles/GTI_MiddleClose/*png')
        gti_car_right = glob.glob('../data/vehicles/GTI_Right/*png')
        gti_car_files = gti_car_far + gti_car_left + gti_car_midclose + gti_car_right
        
    if KITTI:
        kitti_car_files = glob.glob('../data/vehicles/KITTI_extracted/*png')
  
    if udacity:
        udacity_car_files = glob.glob('../data/udacity_cropped/*jpg')
    
    # Collect the filenames for all non-car images in the dataset
    gti_non_car_files = glob.glob('../data/non_vehicles/GTI/*png')
    extras_non_car_files = glob.glob('../data/non_vehicles/Extras/*png')
    
    # Generate augmented data if desired
    if augmented:
        # Check the augmentation operations the user selected
        ops = [rotated, bright_adj, 0 in flipped, 1 in flipped, -1 in flipped]
        # Assign the correct image suffix for corresponding augmentation op
        suffixes = ['rotated', 'bright_adj', 'horz_flip', 'vert_flip', 'both_flip']
        for suffix, op in zip(suffixes, ops):
            aug_gti_car_far = []
            aug_gti_car_left = []
            aug_gti_car_midclose = []
            aug_gti_car_right = []
            if op:
                # Collect image filenames for augmented car datasets
                if 'GTI' in augmented:
                    aug_gti_car_far = glob.glob('../data/augmented_images/vehicles/GTI_Far/' + suffix + '*png')
                    aug_gti_car_left = glob.glob('../data/augmented_images/vehicles/GTI_Left/' + suffix + '*png')
                    aug_gti_car_midclose = glob.glob('../data/augmented_images/vehicles/GTI_MiddleClose/' + 
                                                     suffix + '*png')
                    aug_gti_car_right = glob.glob('../data/augmented_images/vehicles/GTI_Right/' + suffix + '*png')
                    aug_gti_car_files += (aug_gti_car_far + aug_gti_car_left + aug_gti_car_midclose + 
                                          aug_gti_car_right)
                if 'KITTI' in augmented:
                    aug_kitti_car_files += glob.glob('../data/augmented_images/vehicles/KITTI_extracted/' + 
                                                     suffix + '*png')
                if 'udacity' in augmented:
                    aug_udacity_car_files += glob.glob('../data/augmented_images/vehicles/udacity_cropped/*png')
                
                if 'non-cars' in augmented:
                    # Collect image filenames for augmented non-car datasets
                    aug_gti_non_car_files += glob.glob('../data/augmented_images/non_vehicles/GTI/' + suffix + '*png')
                    aug_extras_non_car_files += glob.glob('../data/augmented_images/non_vehicles/Extras/' + 
                                                          suffix + '*png')

    ## Shuffle the data based on collection type (e.g. GTI, KITTI, etc.), subgroup (e.g. far, left, etc.), and
    ## is applicable, augmented operation (e.g. rotated, flipped, etc.)
    # For car dataset
    if GTI:
        gti_car_subgroups = ([1] * len(gti_car_far) + [2] * len(gti_car_left) + [3] * len(gti_car_midclose) +
                             [4] * len(gti_car_right))
        gti_car_1, gti_car_2, y1, y2 = train_test_split(gti_car_files, gti_car_subgroups, 
                                                        stratify=gti_car_subgroups, random_state = 42)
        gti_car_files = gti_car_1 + gti_car_2
    
    # For the augmented car dataset, for each collection, shuffle the images within the collection
    # based on the augmentation operation
    if augmented:
        ops = [rotated, bright_adj, 0 in flipped, 1 in flipped, -1 in flipped]
        if 'GTI' in augmented:
            gti_aug_op_labels = []
            collection_lengths = [len(gti_car_far), len(gti_car_left), len(gti_car_midclose), len(gti_car_right)]
            ii = 1
            for _ in range(sum(ops)):
                for length in collection_lengths:
                    gti_aug_op_labels += [ii] * length
                    ii += 1
            aug_gti_car_x, aug_gti_car_v, _, _ = train_test_split(aug_gti_car_files, gti_aug_op_labels,
                                                                      stratify=gti_aug_op_labels, random_state=42)
            aug_gti_car_files = aug_gti_car_x + aug_gti_car_v
        
        if 'KITTI' in augmented:
            ii = 1
            kitti_aug_op_labels = []
            for _ in range(sum(ops)):
                kitti_aug_op_labels += [ii] * len(kitti_car_files)
                ii += 1
            aug_kitti_car_x, aug_kitti_car_v, y_t, y_v = train_test_split(aug_kitti_car_files, kitti_aug_op_labels,
                                                                          stratify=kitti_aug_op_labels, 
                                                                          random_state=42)
            aug_kitti_car_files = aug_kitti_car_x + aug_kitti_car_v
        
        if 'udacity' in augmented:
            ii = 1
            udacity_aug_op_labels = []
            for _ in range(sum(ops)):
                udacity_aug_op_labels += [ii] * len(udacity_car_files)
                ii += 1
            aug_udacity_car_x, aug_udacity_car_v, _, _ = train_test_split(aug_udacity_car_files, 
                                                                              udacity_aug_op_labels,
                                                                              stratify=udacity_aug_op_labels, 
                                                                              random_state=42)
            aug_udacity_car_files = aug_udacity_car_x + aug_udacity_car_v
        
    # Combine all the car and augmented car files and shuffle by collection(e.g. GTI, KITTI, etc.)
    car_files = (gti_car_files + aug_gti_car_files + kitti_car_files + aug_kitti_car_files +
                udacity_car_files + aug_udacity_car_files)
    car_labels = ([1] * len(gti_car_files) + [2] * len(aug_gti_car_files) + [3] * len(kitti_car_files) +
                  [4] * len(aug_kitti_car_files) + [5] * len(udacity_car_files) + 
                  [6] * len(aug_udacity_car_files))
    car_files_x, car_files_v, _, _ = train_test_split(car_files, car_labels, stratify=car_labels,
                                                      random_state=42)
    car_files = car_files_x + car_files_v
        
    # Now to shuffle the non-car datasets
    non_car_labels = [1] * len(gti_non_car_files) + [2] * len(extras_non_car_files)
    if augmented:
        if 'non-cars' in augmented:
            ii = 3
            collection_lengths = [len(gti_non_car_files), len(extras_non_car_files)]
            for length in collection_lengths:
                for _ in range(sum(ops)):
                    non_car_labels += [ii] * length
                    ii += 1
    
    non_car_files = gti_non_car_files + extras_non_car_files + aug_gti_non_car_files + aug_extras_non_car_files
    non_car_files_x, non_car_files_v, y_t, y_v = train_test_split(non_car_files, non_car_labels,
                                                              stratify=non_car_labels, random_state=42)
    non_car_files = non_car_files_x + non_car_files_v
    if len_non_cars is not None:
        if len_non_cars > 1:
            for _ in range(len_non_cars - 1):
                non_car_files += shuffle(non_car_files)
        else:
            non_car_files = non_car_files[:int(len_non_cars * len(non_car_files))]
           
    all_img_files = np.hstack((np.array(non_car_files), np.array(car_files)))
    img_type_labels = np.hstack((np.zeros(len(non_car_files)), np.ones(len(car_files))))
    
    print('No. of GTI car files: {}'.format(len(gti_car_files)))
    print('No. of KITTI car files: {}'.format(len(kitti_car_files)))
    print('No. of Udacity car files: {}'.format(len(udacity_car_files)))
    
    print('\nNo. of Augmented GTI car files: {}'.format(len(aug_gti_car_files)))
    print('No. of Augmented KITTI car files: {}'.format(len(aug_kitti_car_files)))
    print('No. of Augmented Udacity car files: {}'.format(len(aug_udacity_car_files)))
    
    print('\nNo. of GTI non-car files: {}'.format(len(gti_non_car_files)))
    print('No. of Extras non-car files: {}'.format(len(extras_non_car_files)))
    
    print('\nNo. of Augmented GTI non-car files: {}'.format(len(aug_gti_non_car_files)))
    print('No. of Augmented Extras non-car files: {}'.format(len(aug_extras_non_car_files)))
    
    print('Total number of car files: {}'.format(len(car_files)))
    print('Total number of non-car files: {}'.format(len(non_car_files)))
    print('Total number of images: {}'.format(all_img_files.shape))
    print('Total number of labels: {}'.format(img_type_labels.shape))

    train_img_files = np.copy(all_img_files)
    train_labels_copy = np.copy(img_type_labels)
    
    if debug:
        print('\nNo. of GTI Far files: {}'.format(len(gti_car_far)), 
              '\nNo. of GTI Left files: {}'.format(len(gti_car_left)),
              '\nNo. of GTI MidClose files: {}'.format(len(gti_car_midclose)),
              '\nNo. of GTI Right files: {}'.format(len(gti_car_right)))
        
        return ((aug_gti_car_files, aug_kitti_car_files, aug_gti_non_car_files, aug_extras_non_car_files), 
                train_img_files, train_labels_copy)
    else:
        return train_img_files, train_labels_copy


In [10]:
def make_svc(file_end, settings, dicts):
    GTI = settings['GTI']
    KITTI = settings['KITTI']
    udacity = settings['udacity']
    augmented = settings['aug']
    rotated = settings['rotated']
    flipped = settings['flipped']
    bright_adj = settings['bright_adj']
    len_non_cars = settings['len_non_cars']
    
    train_img_files, train_labels_copy = make_training_files(GTI=GTI, KITTI=KITTI, udacity=udacity, 
                                                             augmented=augmented,
                                                             rotated=rotated, flipped=flipped, bright_adj=bright_adj, 
                                                             len_non_cars=len_non_cars)
    
    spatial_dict = dicts['spatial']
    hist_dict = dicts['hist']
    hog_dict = dicts['hog']
    X_train_features = extract_features(train_img_files, spatial_dict=spatial_dict, hist_dict=hist_dict, 
                                        hog_dict=hog_dict)
    X_train_features = np.array(X_train_features).astype(np.float32)
    # Fit a per-column scaler
    X_scaler = StandardScaler().fit(X_train_features)
    # Apply the scaler to X
    scaled_X = X_scaler.transform(X_train_features)
    print('X train features shape is {}'.format(X_train_features.shape))
    print('Scaled X features is {}'.format(scaled_X.shape))

    save_to_pickle([scaled_X, train_labels_copy, X_scaler], ['features', 'labels', 'scaler'], 
                   ['../pickle/scaled_X_' + file_end])
    save_to_pickle([spatial_dict, hist_dict, hog_dict], ['spatial', 'hist', 'hog'], 
                   ['../pickle/dict_' + file_end])

    rand_state = np.random.randint(0, 100)
    # Shuffle and split the data into a training and test set
    X_train, X_validate, y_train, y_validate = train_test_split(scaled_X, train_labels_copy, test_size=0.2, 
                                                                stratify=train_labels_copy, random_state=rand_state)
    print('X_train shape is {}'.format(X_train.shape))
    print('y_train shape is {}'.format(y_train.shape))
    print('X_validate shape is {}'.format(X_validate.shape))
    print('y_validate shape is {}'.format(y_validate.shape))
    print(y_train[:20])

    # Use a linear SVC 
    svc = LinearSVC()
    # Check the training time for the SVC
    t=time.time()
    svc.fit(X_train, y_train)
    t2 = time.time()
    print(round(t2-t, 2), 'Seconds to train SVC...')
    print('Test Accuracy of SVC = ', round(svc.score(X_validate, y_validate), 4))
    save_to_pickle([svc, hog_dict['trans_sqrt'], hog_dict['block_norm']], ['svc', 'trans_sqrt', 'block_norm'], 
                   ['../pickle/svc_pickle_' + file_end])

def load_data(file_end):
    dict_pickle = pickle.load( open("../pickle/scaled_X_" + file_end + ".p", "rb") )
    X_scaler = dict_pickle['scaler']

    dict_pickle = pickle.load( open("../pickle/dict_" + file_end + ".p", "rb") )
    spatial_dict = dict_pickle['spatial']
    hist_dict = dict_pickle['hist']
    hog_dict = dict_pickle['hog']

    dict_pickle = pickle.load( open("../pickle/svc_pickle_" + file_end + ".p", "rb" ) )
    svc = dict_pickle['svc']
    
    return X_scaler, spatial_dict, hist_dict, hog_dict, svc


In [11]:
# Define a single function that can extract features using hog sub-sampling 
# and make predictions
def find_cars(img, img_format, xstart, xstop, ystart, ystop, scale, X_scaler, svc, hog_dict, 
              spatial_dict=None, hist_dict=None, print_pred=False):
                
    # Extract the Hog parameters
    hg_cspace = hog_dict['conv']
    orient = hog_dict['orient']
    pix_per_cell = hog_dict['pix_per_cell']
    cell_per_block = hog_dict['cell_per_block']
    hg_ch = hog_dict['channels']       
    trans_sqrt = hog_dict['trans_sqrt']
    block_norm = hog_dict['block_norm']
    
    if hg_ch == 'all':
        hg_ch = np.arange(3)
    
    draw_img = np.copy(img)
    if img_format == 'jpg':
        img = img.astype(np.float32) / 255
     
    # Compute individual channel HOG features for the entire image
    img_tosearch = img[ystart:ystop, xstart:xstop, :]
    ctrans_tosearch = convert_color(img_tosearch, conv=hg_cspace)
    bboxes = [] # Store the coordinates of the bounded boxes

    if scale != 1:
        imshape = ctrans_tosearch.shape
        ctrans_tosearch = cv2.resize(ctrans_tosearch, (np.int(imshape[1] / scale), 
                                     np.int(imshape[0] / scale)))
    chs = []
    for ch in hg_ch:
        chs.append(ctrans_tosearch[:,:,ch])

    # Define blocks and steps
    nxblocks = (chs[0].shape[1] // pix_per_cell) - cell_per_block + 1
    nyblocks = (chs[0].shape[0] // pix_per_cell) - cell_per_block + 1 
    nfeat_per_block = orient*cell_per_block**2

    # 64 was the orginal sampling rate, with 8 cells and 8 pix per cell
    window = 64
    nblocks_per_window = (window // pix_per_cell) - cell_per_block + 1
    cells_per_step = 2  # Instead of overlap, define how many cells to step
    nxsteps = (nxblocks - nblocks_per_window) // cells_per_step + 1
    nysteps = (nyblocks - nblocks_per_window) // cells_per_step + 1

    hogs = []
    for ch in chs:
        hog_feature = get_hog_features(ch, orient, pix_per_cell, cell_per_block, 
                                feature_vec=False, trans_sqrt=trans_sqrt, block_norm=block_norm)
        hogs.append(hog_feature)
    
    for xb in range(nxsteps):
        for yb in range(nysteps):
            ypos = yb*cells_per_step
            xpos = xb*cells_per_step

            # Extract HOG for this patch
            hog_list = []
            for hg in hogs:    
                hog_vect = hg[ypos:ypos+nblocks_per_window, 
                              xpos:xpos+nblocks_per_window].ravel()
                hog_list.append(hog_vect)   
            hog_features = np.hstack(hog_list)
        
            xleft = xpos*pix_per_cell
            ytop = ypos*pix_per_cell

           # Extract the image patch
            subimg = cv2.resize(ctrans_tosearch[ytop:ytop+window, 
                                xleft:xleft+window], (64,64))

            # Get color features
            spatial_features = np.array([])
            hist_features = np.array([])
            if spatial_dict is not None:
                sp_cspace = spatial_dict['conv']
                spatial_size = spatial_dict['size']
                sp_ch = spatial_dict['channels']
                spatial_features = bin_spatial(subimg, size=(spatial_size, spatial_size), channel=sp_ch)
            if hist_dict is not None:
                hs_cspace = hist_dict['conv']
                hist_bins = hist_dict['nbins']
                bin_range = hist_dict['bin_range']
                hi_ch = hist_dict['channels']
                hist_features = color_hist(subimg, nbins=hist_bins, bins_range=(0, 256), channel=hi_ch)

            # Scale features and make a prediction
            features = np.hstack((spatial_features, hist_features, hog_features))
            test_features = X_scaler.transform(features.reshape(1, -1))       
            test_prediction = svc.predict(test_features)
            if print_pred:
                print('prediction: {}'.format(test_prediction))
            if test_prediction == 1:
                xbox_left = np.int(xleft*scale)
                ytop_draw = np.int(ytop*scale)
                win_draw = np.int(window*scale)
                cv2.rectangle(draw_img,(xbox_left+xstart, ytop_draw+ystart),
                (xbox_left+xstart+win_draw,ytop_draw+win_draw+ystart),(0,0,255),6) 
                bboxes.append(((xbox_left + xstart, ytop_draw + ystart), 
                               (xbox_left + xstart + win_draw, ytop_draw + win_draw + ystart)))
                
    return draw_img, bboxes


In [12]:
def add_heat(heatmap, bbox_list):
    # Iterate through list of bboxes
    for box in bbox_list:
        # Add += 1 for all pixels inside each bbox
        # Assuming each "box" takes the form ((x1, y1), (x2, y2))
        heatmap[box[0][1]:box[1][1], box[0][0]:box[1][0]] += 1

    # Return updated heatmap
    return heatmap

def apply_threshold(heatmap, threshold):
    # Zero out pixels below the threshold
    output_heatmap = np.copy(heatmap)
    output_heatmap[output_heatmap < threshold] = 0
    
    # Return thresholded map
    return output_heatmap


In [13]:
def make_labeled_bboxes(image, heat, threshold, draw, color=(0, 0, 255), thickness=3):
    out_bboxes = {} # A dict of all the bounded boxes for each car found in this image
    
    # Accumulate bounded boxes across several frames and then apply heat-map with threshold
    heat_map = apply_threshold(heat, threshold)
    labels = label(heat_map)

    # Iterate through all detected cars
    for car_number in range(1, labels[1]+1):
        # Find pixels with each car_number label value
        nonzero = (labels[0] == car_number).nonzero()
        # Identify x and y values of those pixels
        nonzeroy = np.array(nonzero[0])
        nonzerox = np.array(nonzero[1])
        # Define a bounding box based on min/max x and y
        bbox = ((np.min(nonzerox), np.min(nonzeroy)), (np.max(nonzerox), np.max(nonzeroy)))
        out_bboxes[car_number] = bbox
        if draw:
            # Draw the box on the image
            cv2.rectangle(image, bbox[0], bbox[1], color, thickness)
    
    if draw:
        # Return the image and bboxes
        return image, out_bboxes
    else:
        # Return the bboxes
        return out_bboxes


In [14]:
from moviepy.editor import VideoFileClip
from IPython.display import HTML


In [15]:
# Define a class to keep track of data among frames
class Frame():
    def __init__(self, img_shape):
        self.image = None
        self.frame_count = 0
        self.car_accum = False
        self.state = 'detect'
        self.show_bboxes = False
        self.debug = False
        self.detect_count = 0 # Tracks the number of frames that have passed
        self.detect_samples = 10 # How many frames should we collect boxes for
        self.detect_thresh = 9
        self.accum_thresh = 2
        self.track_count = 0
        self.track_samples = 2        
        self.track_thresh = 2
        self.track_length = 1
        self.low_conf_detections = {}
        self.low_conf_thresh = 2
        self.overlap_ratio = {}
        self.ratio_text = 'Not enough overlap in cars: '
        self.detect_heat = np.zeros(img_shape)
        self.track_heat = np.zeros(img_shape)
        self.accum_heat = np.zeros(img_shape)
        self.detect_heat_record = []
        self.track_heat_record = []
        self.accum_heat_record = []
        self.car_boxes = defaultdict(list)
        self.final_boxes = defaultdict(list)

    def show_bbox_details(self, thickness, colors, box_lists):
        for t, color, box_list in zip(thickness, colors, box_lists):
            for box in box_list:
                cv2.rectangle(self.image, box[0], box[1], color, t)
            

In [30]:
# This is the function that processes each frame of the video to find cars and apply a box around them
def process_image(image):
    
    frame.image = np.copy(image)
    track_boxes = []
    test_boxes = []
    detect_boxes = []
    accum_detect_boxes = []
    search_box = []
    car_boxes = []    
    reset_text = ''
    font = cv2.FONT_HERSHEY_SIMPLEX
    if frame.debug:
        cv2.putText(frame.image, 'start state: {}'.format(frame.state), (650, 50), font, 1, 
                    (200,255,155), 2, cv2.LINE_AA) 
        cv2.putText(frame.image, 'frame count: {}'.format(frame.frame_count), (650, 150), font, 1, 
                    (200,255,155), 2, cv2.LINE_AA)  
        
    if frame.state == 'detect':     
        # Search for cars in the distance
        xstart = 0
        xstop = image.shape[1]
        ycoords = [(400, 550), (400, 6000), (400, 656)]
#         scales = [1, 1.5, 1.7, 2]
#         scales = [0.8, 1, 1.5]
#         scales = [0.5, 1, 1.5]
        scales = [1, 1.25, 1.5]
        if len(ycoords) != len(scales):
            raise Exception('The number of image detection windows must equal the number of detection scales')
        for ycoord, scale in zip(ycoords, scales):
            ystart, ystop = ycoord[0], ycoord[1]
            _, bboxes = find_cars(image, img_format, xstart, xstop, ystart, ystop, scale, X_scaler,
                                  svc, hog_dict, spatial_dict, hist_dict)
            detect_boxes += bboxes
        
        if frame.car_accum:
            # Accumulate bounded boxes across several frames and then apply heat-map with threshold
            heat = np.zeros_like(frame.image[:, :, 0])
            heat = add_heat(heat, detect_boxes)
            car_boxes = make_labeled_bboxes(frame.image, heat, threshold=frame.accum_thresh, draw=False)
            for car, box in car_boxes.items():
                accum_detect_boxes += [box]
            frame.accum_heat = add_heat(frame.accum_heat, accum_detect_boxes)
        else:
            frame.detect_heat = add_heat(frame.detect_heat, detect_boxes)
        
        # Store a copy of the current heat map for debugging
        # Note that the index of the current heat map being added to the list equals the current detect_count
        frame.detect_heat_record.append(np.copy(frame.detect_heat))
        frame.accum_heat_record.append(np.copy(frame.accum_heat))

        if frame.debug:
            cv2.putText(frame.image, 'detect count: ' + str(frame.detect_count), (650, 200), 
                        font, 1, (200,255,155), 2, cv2.LINE_AA)
        
        frame.detect_count += 1
        if (frame.detect_count % frame.detect_samples) == 0:
            if frame.car_accum:
                frame.final_boxes = make_labeled_bboxes(frame.image, frame.accum_heat,
                                                        threshold=frame.detect_thresh, draw=False)            
            else:
                frame.final_boxes = make_labeled_bboxes(frame.image, frame.detect_heat,
                                                        threshold=frame.detect_thresh, draw=False)
            # Reset the store bboxes
            frame.detect_heat = np.zeros_like(frame.image[:, :, 0])
            frame.accum_heat = np.zeros_like(frame.image[:, :, 0])
            if len(frame.final_boxes) > 0:
                frame.state = 'track'  
                reset_text = 'go track'
            else:
                frame.state = 'detect'
                reset_text = 'reset'
                    
    if frame.state == 'track':              
        adj_x_l, adj_x_r = 15, 15
        adj_y_b, adj_y_t = 10, 5
        for car, box in frame.final_boxes.items():            
            ystart = np.clip(box[0][1] - adj_y_t, 0, frame.image.shape[0])
            ystop = np.clip(box[1][1] + adj_y_b, 0, frame.image.shape[0])
            xstart = np.clip(box[0][0] - adj_x_l, 0, frame.image.shape[1])
            xstop = np.clip(box[1][0] + adj_x_r, 0, frame.image.shape[1])      
            search_box += [((xstart, ystart), (xstop, ystop))]
            single_car_boxes= []
            scales = [0.5, 0.8, 1.1]
            for scale in scales:
                _, bboxes = find_cars(image, img_format, xstart, xstop, ystart, ystop, scale, X_scaler,
                                      svc, hog_dict, spatial_dict, hist_dict)
                single_car_boxes += bboxes
            
            track_boxes += single_car_boxes
            frame.car_boxes[car] += single_car_boxes
            frame.track_heat = add_heat(frame.track_heat, single_car_boxes)
        
        # Store a copy of the current heat map for debugging
        # Note that the index of the current heat map being added to the list equals the current track_count
        frame.track_heat_record.append(np.copy(frame.track_heat)) 
        if frame.debug:
            cv2.putText(frame.image, 'track count: ' + str(frame.track_count), (650, 250), 
                        font, 1, (200,255,155), 2, cv2.LINE_AA)

        frame.track_count += 1 
        if (frame.track_count % frame.track_samples) == 0:
            reset_text = 'go verify'  
            frame.state = 'verify'
            frame.track_heat = np.zeros_like(frame.image[:, :, 0])

    if frame.state == 'verify':
        false_detections = [] # Detections that are false positives
        too_low_conf = False
        for car, boxes in frame.car_boxes.items():
            heat = np.zeros_like(frame.image[:, :, 0])
            heat = add_heat(heat, boxes)
            test_car_boxes = make_labeled_bboxes(frame.image, heat, 
                                                 threshold=frame.track_thresh, draw=False)   
            if test_car_boxes:
                org_box = frame.final_boxes[car]
                left_x, top_y = [], []
                right_x, bottom_y = [], []
                for _, box in test_car_boxes.items():
                    left_x += [box[0][0]]
                    top_y += [box[0][1]]
                    right_x += [box[1][0]]
                    bottom_y += [box[1][1]]
  
                test_box = ((min(left_x), min(top_y)), (max(right_x), max(bottom_y)))
                test_boxes += [test_box]
                # For there to be an intersection of boxes, min R edge > max L edge and 
                # min bot edge > max top edge
                min_right_edge = min(test_box[1][0], org_box[1][0])
                max_left_edge = max(test_box[0][0], org_box[0][0])
                min_bottom_edge = min(test_box[1][1], org_box[1][1])
                max_top_edge = max(test_box[0][1], org_box[0][1])
                overlap = ((max_left_edge, max_top_edge), (min_right_edge, min_bottom_edge))
                overlap_area = (min_right_edge - max_left_edge) * (min_bottom_edge - max_top_edge)
                org_area = (org_box[1][0] - org_box[0][0]) * (org_box[1][1] - org_box[0][1])
                overlap_ratio = overlap_area / np.float(org_area)
                frame.overlap_ratio[car] = overlap_ratio
                if ((min_right_edge > max_left_edge) & (min_bottom_edge > max_top_edge) & 
                    (overlap_ratio >= 0.5)):   
                    frame.final_boxes[car] = test_box
                    if car in frame.low_conf_detections:
                        frame.low_conf_detections[car] = 0
                else:
                    frame.low_conf_detections[car] = frame.low_conf_detections.get(car, 0) + 1               
            else:
                false_detections += [car]
                
        frame.car_boxes = defaultdict(list) 
        if frame.low_conf_detections:
            text = 'Not enough overlap in cars: '
            for ii, car in enumerate(frame.low_conf_detections):
                if frame.low_conf_detections[car] > 0:
                    text += (str(car) + '->' + '%.2f' % frame.overlap_ratio[car])
                    if ii < (len(frame.low_conf_detections) - 1):
                        text += ', '     
                    
                if frame.low_conf_detections[car] >= frame.low_conf_thresh:
                    too_low_conf = True
                    break

            if frame.debug:
                cv2.putText(frame.image, text, (650, 300), 
                            font, 1, (200,255,155), 2, cv2.LINE_AA)     
                
        if false_detections:
            text = 'False positives in detections: '
            for ii, car in enumerate(false_detections):
                frame.final_boxes.pop(car, None)
                text += str(car)
                if ii < (len(false_detections) - 1):
                    text += ', '  
                    
            if frame.debug:
                cv2.putText(frame.image, text, (650, 350), font, 1, 
                            (200,255,155), 2, cv2.LINE_AA)
        
        if ((len(frame.final_boxes) == 0) | ((frame.track_count % frame.track_length) == 0) | too_low_conf):
            reset_text = 'back to detect'
            frame.state = 'detect'
            frame.low_conf_detections = {}
        else:
            frame.state = 'track'
    
    for car, box in frame.final_boxes.items():
        cv2.rectangle(frame.image, box[0], box[1], (0, 0, 255), 6)  
        
    if frame.show_bboxes:
        if frame.car_accum:
            frame.show_bbox_details(thickness=[3, 2, 4, 3, 2], 
                                    colors=[(255, 255, 0), (100, 100, 0), (0, 0, 0), (0, 255, 255), 
                                            (100, 0, 100)],
                                    box_lists= [accum_detect_boxes, detect_boxes, search_box, 
                                                test_boxes, track_boxes])
        else:
            frame.show_bbox_details(thickness=[2, 4, 3, 2], 
                                    colors=[(100, 100, 0), (0, 0, 0), (0, 255, 255), (100, 0, 100)],
                                    box_lists= [detect_boxes, search_box, test_boxes, track_boxes])
    
    if frame.debug:
        cv2.putText(frame.image, 'end state: {}'.format(frame.state), (650, 100), font, 1, 
                    (200,255,155), 2, cv2.LINE_AA) 
        cv2.putText(frame.image, reset_text, (1000, 50), 
                    font, 1, (255, 0, 0), 2, cv2.LINE_AA)  
        
    frame.frame_count += 1  
    
    return frame.image


In [17]:
need_svc = False
file_end = 'ycrcb'
settings = {'GTI': True, 'KITTI': True, 'udacity': False, 'aug': [], 
            'rotated': False, 'flipped':[], 
            'bright_adj': False, 'len_non_cars': None}

## Here is a list of possible color spaces to use
'''cv2.COLOR_RGB2HSV
   cv2.COLOR_RGB2LUV
   cv2.COLOR_RGB2HLS
   cv2.COLOR_RGB2YUV
   cv2.COLOR_RGB2YCrCb'''
    
spatial_dict = {'conv': 'RGB2YCrCb', 'size': 32, 'channels': 'all'}
hist_dict = {'conv': 'RGB2YCrCb', 'nbins': 32, 'bin_range': (0, 256), 'channels': 'all'}
hog_dict = {'conv': 'RGB2HSV', 'orient': 9, 'pix_per_cell': 8, 'cell_per_block': 2, 'channels': 'all',
            'trans_sqrt': True, 'block_norm': 'L1'}
dicts = {'spatial': spatial_dict, 'hist': hist_dict, 'hog': hog_dict}
if need_svc:
    make_svc(file_end, settings, dicts)
X_scaler, spatial_dict, hist_dict, hog_dict, svc = load_data(file_end)
print(hog_dict)

{'trans_sqrt': True, 'block_norm': 'L1', 'conv': 'RGB2YCrCb', 'channels': 'all', 'cell_per_block': 2, 'orient': 9, 'pix_per_cell': 8}


In [None]:
img_format = 'jpg'
img = mpimg.imread('../test_images/test1.jpg')
frame = Frame(img.shape[:2])
frame.show_bboxes = False
frame.debug = False
frame.car_accum = True
if frame.car_accum:
    frame.accum_thresh = 1
frame.detect_samples = 3
frame.detect_thresh = 3
frame.track_samples = 3       
frame.track_thresh = 3
frame.track_length = 6

if (frame.track_length % frame.track_samples) != 0:
    raise Exception('Please make track_length a multiple of track_samples')
    
video_name = 'project'
output_name = 'debug_' + video_name
ii = 10
clip1 = VideoFileClip("../" + video_name + "_video.mp4", audio=False)
road_clip = clip1.fl_image(process_image)
%time road_clip.write_videofile("../" + output_name + "_output_" + str(ii) + ".mp4", audio=False, verbose=0)


 12%|█▏        | 148/1261 [04:50<37:17,  2.01s/it]

In [None]:
import PyQt5
%matplotlib qt5


In [None]:
def plot_heat_maps(heat_record, rows, cols, dev, title):
    fig, axes = plt.subplots(rows, cols)
    plt.tight_layout()
    plt.suptitle(title, fontsize=10)
    for i, ax in enumerate(axes.ravel()):
        ax.imshow(heat_record[i + dev], cmap='hot')
        ax.set_title('Frame {}'.format(i + dev))

In [None]:
# plot_heat_maps(frame.accum_heat_record, 5, 2, 631, 'Accumulated')
plot_heat_maps(frame.track_heat_record, 5, 2, 0, 'track')