In [8]:
import numpy as np
import cv2
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import glob as glob
import math
import os
import time
plt.rcParams['figure.figsize'] = (20.0, 10.0)
%matplotlib inline

from utilities import *
from skimage.feature import hog

from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC, SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from scipy.ndimage.measurements import label

import tensorflow as tf
from sklearn.utils import shuffle

from alexnet import AlexNet


In [2]:
##
nom_vehicle_filenames = glob.glob('train_images_vehicles/*/*.png')
nom_non_vehicle_filenames = glob.glob('train_images_non_vehicles/*/*.png')
extra_vehicle_filenames = glob.glob('train_extra/vehicles/*.jpg')
extra_non_vehicle_filenames = glob.glob('train_extra/non_vehicles/*.jpg')

vehicle_filenames=nom_vehicle_filenames+extra_vehicle_filenames
non_vehicle_filenames=nom_non_vehicle_filenames+extra_non_vehicle_filenames
##
vehicle_labels = np.ones((len(vehicle_filenames)))
non_vehicle_labels = np.zeros((len(non_vehicle_filenames)))
##
extra_weight = 2
nom_vehicle_weights = np.ones((len(nom_vehicle_filenames)))
nom_non_vehicle_weights = np.ones((len(nom_non_vehicle_filenames)))
extra_vehicle_weights = np.ones((len(extra_vehicle_filenames)))*extra_weight
extra_non_vehicle_weights = np.ones((len(extra_non_vehicle_filenames)))*extra_weight

vehicle_weights = np.concatenate([nom_vehicle_weights, extra_vehicle_weights])
non_vehicle_weights = np.concatenate([nom_non_vehicle_weights, extra_non_vehicle_weights])
#sample_weights = np.concatenate([vehicle_weights, non_vehicle_weights])
##
print("               Samples  Labels  Weights")
print("Vehicles:     ", len(vehicle_filenames), "   ", len(vehicle_labels), "  ", len(vehicle_weights))
print("Non-Vehicles: ", len(non_vehicle_filenames), "   ", len(non_vehicle_labels), "  ", len(non_vehicle_weights))
#print(len(nom_non_vehicle_filenames))
#print(len(extra_vehicle_filenames))
#print(len(extra_non_vehicle_filenames))

               Samples  Labels  Weights
Vehicles:      8827     8827    8827
Non-Vehicles:  9006     9006    9006


In [3]:
vehicle_ims = [mpimg.imread(fname) for fname in nom_vehicle_filenames]
non_vehicle_ims = [mpimg.imread(fname) for fname in nom_non_vehicle_filenames]

In [5]:
X_all = np.array(vehicle_ims+non_vehicle_ims)

vehicle_labels = np.ones((len(vehicle_ims)))
non_vehicle_labels = np.zeros((len(non_vehicle_ims)))
y_all = np.concatenate([vehicle_labels, non_vehicle_labels])

print(X_all.shape, y_all.shape)

(17760, 64, 64, 3) (17760,)


In [None]:
fname = vehicle_filenames[5]
im=mpimg.imread(fname)
showImage(im)

In [None]:
# utility functions

def getHOGFeatures(img, orient, pix_per_cell, cell_per_block, vis=False, feature_vec=True):
  # img: must be greyscale
  # orient: number of orientation bins
  # 
  if vis == True:
    features, hog_image = hog(img, 
                              orientations=orient, 
                              pixels_per_cell=(pix_per_cell, pix_per_cell),
                              cells_per_block=(cell_per_block, cell_per_block), 
                              transform_sqrt=False,
                              visualise=True, 
                              feature_vector=False,
                              block_norm='L1')
    return features, hog_image
  else:      
    features = hog(img, 
                   orientations=orient, 
                   pixels_per_cell=(pix_per_cell, pix_per_cell),
                   cells_per_block=(cell_per_block, cell_per_block), 
                   transform_sqrt=False, 
                   visualise=False, 
                   feature_vector=feature_vec,
                   block_norm='L1')
    return features
  
def getColorHistogram(imgs, nbins=32, channels=[], bins_range=[0, 256]):
  # we use opencv's histogram calculator bc it is ~40x faster (according to opencv's tutorial)
  # imgs, channels, histSize, ranges must be lists i.e. [...]
  tbr = []
  for c in channels:
    hist = cv2.calcHist(images = imgs,channels=[c],mask=None,histSize=nbins,ranges=bins_range)
    tbr.append(hist)
  return np.reshape(np.concatenate(tbr), (-1))

def customFilter(img):
  fac1 = 0.01 # Cutoff
  fac2 = 0.04 # Steepness
  Z = 255.0 / (1.0+fac1*np.exp(fac2*((255.0-img[:, :, 0]) + (255.0-img[:, :, 1])))) 
  return Z.astype(np.uint8)

def spatialBinning(img, color_space='RGB', size=(32, 32)):
  # img: should be RGB if color_space is not RGB
  # Convert image to new color space if specified
  if color_space != 'RGB':
    if color_space == 'HSV':
      feature_image = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)
    elif color_space == 'LUV':
      feature_image = cv2.cvtColor(img, cv2.COLOR_RGB2LUV)
    elif color_space == 'HLS':
      feature_image = cv2.cvtColor(img, cv2.COLOR_RGB2HLS)
    elif color_space == 'YUV':
      feature_image = cv2.cvtColor(img, cv2.COLOR_RGB2YUV)
    elif color_space == 'YCrCb':
      feature_image = cv2.cvtColor(img, cv2.COLOR_RGB2YCrCb)
    elif color_space == 'GRY':
      feature_image = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
    elif color_space == 'CST':
      feature_image = customFilter(img)
  else: feature_image = np.copy(img)             
  # Use cv2.resize().ravel() to create the feature vector
  features = cv2.resize(feature_image, size).ravel() 
  # Return the feature vector
  return features

In [None]:
def imagePipeline(img, print_stats=False, scaler=None):
  
  do_hog = True
  do_color_hist = False
  do_spatial = False
  
  features = []
  
  if do_hog == True:
    g_im = greyscale(img)
    num_orient_bins = 9
    pix_per_cell = 6
    cell_per_block = 2
    hog_features = getHOGFeatures(img=g_im, 
                                  orient=num_orient_bins, 
                                  pix_per_cell=pix_per_cell, 
                                  cell_per_block=cell_per_block, 
                                  vis=False, 
                                  feature_vec=True)
    if print_stats == True:
      print('hog: ', hog_features.shape)
      print('  mean: ', np.mean(hog_features))
      print('  std: ', np.std(hog_features))
      print('  min, max: ', np.min(hog_features), np.max(hog_features))
    features.append(hog_features)
    
  if do_color_hist == True:
    nbins = [32]
    channels = [0, 1, 2]
    b_range = [0, 256]
    hist_features = getColorHistogram(imgs=[img], 
                                      nbins=nbins, 
                                      channels=channels, 
                                      bins_range=b_range)
    if print_stats == True:
      print('color histogram: ', hist_features.shape)
      print('  mean: ', np.mean(hist_features))
      print('  std: ', np.std(hist_features))
      print('  min, max: ', np.min(hist_features), np.max(hist_features))
    features.append(hist_features)

  if do_spatial == True:
    new_size = (32, 32)
    c_space = 'YCrCb'
    bin_features = spatialBinning(img=img, 
                                  color_space=c_space,
                                  size=new_size)
    if print_stats == True:
      print('spatial binning (image resizing): ', bin_features.shape)
      print('  mean: ', np.mean(bin_features))
      print('  std: ', np.std(bin_features))
      print('  min, max: ', np.min(bin_features), np.max(bin_features))
    features.append(bin_features)
  
  tbr = np.concatenate(features)
  if scaler is not None:
    tbr = scaler.transform(tbr.reshape(1, -1))
  return tbr

# Generate a random index to look at a car image
ind = np.random.randint(0, len(vehicle_filenames))
# Read in the image
im, im_shape = loadImage(vehicle_filenames[ind], greyscale=False)

print(im_shape)
test_f = imagePipeline(im, print_stats=True)
print(test_f.shape)

In [None]:
import time
from random import randint

def augmentSingleImage(img, num_perturbs=0):
    # Flip all images horizontally
    xtended = np.array([img, img[ :, ::-1, :]])
    # perturb flipped images
    xtended_len = xtended.shape[0]
    for i in range(0, num_perturbs):
        pert_ims = np.array([perturb(img_t) for img_t in xtended[:xtended_len, :, :, :]])
        xtended = np.append(xtended, pert_ims, axis=0)

    return xtended

def extractFeatures(filenames, show_summary=False):
  f_list = []
  for fname in filenames:
    img = mpimg.imread(fname)
    #imgs = augmentSingleImage(img, num_perturbs=1)
    #for i in imgs:
    #  features = imagePipeline(img)
    #  f_list.append(features)
    features = imagePipeline(img)
    f_list.append(features)
    
  return np.array(f_list)

t1 = time.time()
vehicle_features = extractFeatures(vehicle_filenames)
non_vehicle_features = extractFeatures(non_vehicle_filenames)
time_taken = time.time() - t1
print("Time to extract Features: ", time_taken)
print("Vehicles: ", vehicle_features.shape[0])
print("Non-Vehicles: ", non_vehicle_features.shape[0])

In [None]:
def insert_subimage(image, sub_image, y, x): 
    h, w, c = sub_image.shape
    image[y:y+h, x:x+w, :]=sub_image 
    return image

def showSampleImage(im_list):
  im_height, im_width, im_chan = (64, 64, 3)
  num_rows = 10
  num_cols = 10
  results_image = 255.*np.ones(shape=(num_rows*im_height, num_cols*im_width, im_chan),dtype=np.float32)
  sample_ims=10
  for r in range(num_rows):
    for c in range(num_cols):
      im = mpimg.imread(im_list[randint(0, len(im_list))])
      insert_subimage(results_image, im, r*im_height, c*im_width)
  return results_image

f_num = 0
v_sample = showSampleImage(vehicle_filenames)
plt.rcParams["figure.figsize"] = (25,25)
f_num = showImage(v_sample, f_num)
nv_sample = showSampleImage(non_vehicle_filenames)
plt.rcParams["figure.figsize"] = (25,25)
f_num = showImage(nv_sample, f_num)

In [None]:
all_weights = np.concatenate([vehicle_weights, non_vehicle_weights])
all_features = np.concatenate([vehicle_features, non_vehicle_features])
all_labels = np.concatenate([vehicle_labels, non_vehicle_labels])

print(all_labels.shape)
y_data = np.concatenate([all_labels, all_weights])
print(y_data.shape)

X_scaler = StandardScaler().fit(X_data)
scaled_X = X_scaler.transform(X_data)

#X_train, X_val, y_train, y_val = train_test_split(scaled_X, y_data, test_size=0.2, random_state=42)


In [None]:
nb_classes = 2
epochs = 10
batch_size = 128

X_train, X_val, y_train, y_val = train_test_split(X_all, y_all, test_size=0.2, random_state=0)

features = tf.placeholder(tf.float32, (None, 64, 64, 3))
labels = tf.placeholder(tf.int64, None)
resized = tf.image.resize_images(features, (227, 227))

# Returns the second final layer of the AlexNet model,
# this allows us to redo the last layer for the traffic signs
# model.
fc7 = AlexNet(resized, feature_extract=True)
fc7 = tf.stop_gradient(fc7)
shape = (fc7.get_shape().as_list()[-1], nb_classes)
fc8W = tf.Variable(tf.truncated_normal(shape, stddev=1e-2))
fc8b = tf.Variable(tf.zeros(nb_classes))
logits = tf.nn.xw_plus_b(fc7, fc8W, fc8b)

cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels)
loss_op = tf.reduce_mean(cross_entropy)
opt = tf.train.AdamOptimizer()
train_op = opt.minimize(loss_op, var_list=[fc8W, fc8b])
init_op = tf.global_variables_initializer()

preds = tf.arg_max(logits, 1)
accuracy_op = tf.reduce_mean(tf.cast(tf.equal(preds, labels), tf.float32))


def eval_on_data(X, y, sess):
    total_acc = 0
    total_loss = 0
    for offset in range(0, X.shape[0], batch_size):
        end = offset + batch_size
        X_batch = X[offset:end]
        y_batch = y[offset:end]

        loss, acc = sess.run([loss_op, accuracy_op], feed_dict={features: X_batch, labels: y_batch})
        total_loss += (loss * X_batch.shape[0])
        total_acc += (acc * X_batch.shape[0])

    return total_loss/X.shape[0], total_acc/X.shape[0]

with tf.Session() as sess:
  sess.run(init_op)
  for i in range(epochs):
    # training
    print("Epoch", i+1)
    t0 = time.time()
    X_train, y_train = shuffle(X_train, y_train)
    for offset in range(0, X_train.shape[0], batch_size):
      end = offset + batch_size
      sess.run(train_op, feed_dict={features: X_train[offset:end], labels: y_train[offset:end]})
      val_loss, val_acc = eval_on_data(X_val, y_val, sess)
      print("Validation Accuracy: ")
      print(val_acc, sep=' ', end='', flush=True)
    val_loss, val_acc = eval_on_data(X_val, y_val, sess)
    print("  Time: %.3f seconds" % (time.time() - t0))
    print("  Validation Loss =", val_loss)
    print("  Validation Accuracy =", val_acc)
    print("")

Epoch 1
Validation Accuracy: 
0.628941441441

In [None]:
## Use a linear SVC 
clf = 'SVC'
svc = SVC(kernel='linear', probability=False)
# scores = svc.decision_function(X)

## Use a Decision Tree
#clf = 'DT'
#svc = DecisionTreeClassifier(min_samples_split=50)
#scores = svc.predict_proba(X)

# Check the training time for the SVC
t1=time.time()
svc.fit(X_train, y_train, sample_weights)
t2 = time.time()
training_time = round(t2-t1, 4)
print(training_time, 'Seconds to train SVC...')
# Check the score of the SVC
svc_score = round(svc.score(X_test, y_test), 8)
print('Test Accuracy of SVC = ', svc_score)

In [None]:
im_shape = (720, 1280)
near_min_row = im_shape[0]-40
far_min_row = 460

near_max_height = 290
far_max_height = 80

near_width = 300
far_width = 70

near_step_size = 30
far_step_size = 10

sampling_num = 30
base = 5

min_row_sampling=np.logspace(start=math.log(near_min_row, base), stop=math.log(far_min_row, base), num=sampling_num, base=base, dtype=np.uint16)
print(min_row_sampling)
max_height_sampling=np.logspace(start=math.log(near_max_height, base), stop=math.log(far_max_height, base), num=sampling_num, base=base, dtype=np.uint16)
#width_sampling=np.logspace(start=math.log(near_width, base), stop=math.log(far_width, base), num=sampling_num, base=base, dtype=np.uint16)
width_sampling=max_height_sampling
step_size_sampling=np.logspace(start=math.log(near_width, base), stop=math.log(far_width, base), num=sampling_num, base=base, dtype=np.uint16)

In [None]:
def sliding_window_horizontal(image_strip, step_size, window_width):
  # slide a window across the image
  for c in np.arange(start=int(window_width/2), stop=image_strip.shape[1]-int(window_width/2), step=step_size, dtype=np.uint16):
    yield (c, image_strip[:, int(c-window_width/2):c + int(window_width/2)])

def horizontal_strip(image, min_row, height):
  return image[min_row-height:min_row, :]

def findHotWindows(img, min_row_sampling, max_height_sampling, width_sampling, step_size_sampling, feature_scaler, classifier, return_ims=False):
  hot_windows = []
  imgs = []
  sampling_num = min_row_sampling.shape[0]
  for i in range(sampling_num):
    strip = horizontal_strip(img, min_row_sampling[i], max_height_sampling[i])
    for (c, window) in sliding_window_horizontal(strip, step_size_sampling[i], width_sampling[i]):
      resized_window = cv2.resize(window, (64, 64))
      features = imagePipeline(resized_window, scaler=feature_scaler)
      #scaled_features = feature_scaler.transform(features.reshape(1,-1))
      #predict_probs = svc.decision_function(features)
      #prediction = (predict_probs-prob_threshold) >= 0
      prediction = svc.predict(features)
      if prediction == True:
        hot_windows.append((c, min_row_sampling[i], max_height_sampling[i], width_sampling[i]))
        if return_ims == True:
          imgs.append(resized_window)
  return hot_windows, imgs

def createNominalHeatMap(im, hot_windows):
  heatmap = np.zeros_like(im[:,:,0])
  plus = max(int(255/len(hot_windows)), 1)
  for w in hot_windows:
    #w = (center_col, min_row, height, width)
    heatmap[int(w[1]-w[2]):int(w[1]), int(w[0]-w[3]/2):int(w[0]+w[3]/2)] += plus
  return heatmap

im_fname = 'test_images/test1.jpg'
test_im = mpimg.imread(im_fname)
hot_windows, sub_imgs = findHotWindows(test_im, min_row_sampling, max_height_sampling, width_sampling, step_size_sampling, X_scaler, svc, return_ims=True)

for w in hot_windows:
  drawBox(test_im, w[0], w[1], w[2], w[3], c=[0, 255, 0])

#heatmap = createNominalHeatMap(test_im, hot_windows)

f_num = 0
f_num = showImage(test_im, f_num)
#f_num = showImage(heatmap, f_num)

In [None]:
def extraTrain(imgs, p_folder, n_folder, p_ind=0, n_ind=0):
  for img in imgs:
    plt.imshow(img)
    plt.show()
    while True:
      key_press = input()
      if key_press is 'p':
        cv2.imwrite(p_folder+str(p_ind)+'.jpg', img)
        p_ind=p_ind+1
        break
      elif key_press is 'n':
        cv2.imwrite(n_folder+str(n_ind)+'.jpg', img)
        n_ind=n_ind+1
        break
      else:
        print("Try again")
  return [p_ind, n_ind]

In [None]:
test_filenames = glob.glob('test_images/test*.jpg')

f_num = 0

all_sub_images = []
for fname in test_filenames:
  im = mpimg.imread(fname)
  hot_windows, imgs = findHotWindows(im, min_row_sampling, max_height_sampling, width_sampling, step_size_sampling, X_scaler, svc, return_ims=True)
  all_sub_images = all_sub_images+imgs
  if hot_windows:
    for w in hot_windows:
      drawBox(im, w[0], w[1], w[2], w[3], c=[0, int(255*c), 0], thickness = 5)
  f_num = showImage(im, f_num, title=fname+' Num. Windows: '+str(len(hot_windows)))

  
print(len(all_sub_images))


In [None]:
p_folder = 'train_extra/vehicles/'
n_folder = 'train_extra/non_vehicles/'
p_ind = 0
n_ind = 0
p_ind, n_ind = extraTrain(all_sub_images, p_folder, n_folder, p_ind, n_ind)


In [None]:
vehicle_filenames = glob.glob('train_images_vehicles/*/*.png')
non_vehicle_filenames = glob.glob('train_images_non_vehicles/*/*.png')
extra_vehicle_filenames = glob.glob('train_extra/vehicles/*.jpg')
extra_non_vehicle_filenames = glob.glob('train_extra/non_vehicles/*.jpg')

print(len(extra_vehicle_filenames))
print(len(extra_non_vehicle_filenames))

train_features_vehicles = extractFeatures(extra_vehicle_filenames)
train_features_non_vehicles = extractFeatures(extra_non_vehicle_filenames)

vehicle_labels = np.ones((len(train_features_vehicles)))
non_vehicle_labels = np.zeros((len(train_features_non_vehicles)))
X_data = np.concatenate([train_features_vehicles, train_features_non_vehicles])
y_train = np.concatenate([vehicle_labels, non_vehicle_labels])
X_scaler = StandardScaler().fit(X_data)
X_train = X_scaler.transform(X_data)
weights = np.ones_like(y_train)*2.0

svc.fit(X_train, y_train, weights)


In [None]:
# DBSCAN - Density-Based Spatial Clustering of Applications with Noise
# scikit-learn documentation: http://scikit-learn.org/stable/modules/clustering.html#dbscan
from sklearn.cluster import DBSCAN

# Higher min_samples, stricter classification
min_samples = 2
# Lower eps, stricter classification
eps = 90
db = DBSCAN(eps=eps, min_samples=min_samples)

def boundingWindow(windows):
  if len(windows)==1:
    print("single window")
    return windows
  max_row = np.max(windows[:, 1])
  min_row = np.min(windows[:, 1]-windows[:, 2])
  max_col = np.max(windows[:, 0]+windows[:, 3]/2)
  min_col = np.min(windows[:, 0]-windows[:, 3]/2)
  return [int((max_col+min_col)/2),max_row,max_row-min_row,max_col-min_col]
  
def DBSCANClustering(hot_windows, dbscan_obj, weights=[]):
  centers = []
  weights=np.array(weights)
  for w in hot_windows:
    #w = (center_col, min_row, height, width)
    centers.append([w[1]-w[2]/2, w[0]])
  centers = np.array(centers)
  if weights.shape[0]==0:
    weights = np.ones(centers.shape[0])
  labels = dbscan_obj.fit_predict(centers, sample_weight=weights)
  return [labels, centers]

test_filenames = glob.glob('test_images/test*.jpg')

f_num = 0
for fname in test_filenames:
  _, only_fname = os.path.split(fname)
  im, im_shape = loadImage(fname)
  hot_windows, conf = findHotWindows(im, min_row_sampling, max_height_sampling, width_sampling, step_size_sampling, X_scaler, svc)
  if hot_windows:
    hot_windows = np.array(hot_windows)
    labels, centers = DBSCANClustering(hot_windows, db, weights=conf)
    #labels, centers = DBSCANClustering(hot_windows, db, weights=[])
    num_noise = labels[labels==-1].shape[0]
    unique_labels = set(labels)
    num_clusters = len(unique_labels) - (1 if -1 in labels else 0)
    for l in unique_labels:
      masked_windows = hot_windows[(labels == l), :]
      if l == -1:
        for w in masked_windows:
          drawBox(im, w[0], w[1], w[2], w[3], c=[255, 0, 0], thickness = 5)
      else:
        w = boundingWindow(masked_windows)
        drawBox(im, w[0], w[1], w[2], w[3], c=[0, 255, 0], thickness = 5)
    f_num = showImage(im, f_num, title=only_fname+' Num. Cars: '+str(num_clusters)+' Num. Noise: '+str(num_noise))

In [None]:
from moviepy.editor import VideoFileClip

def pipelineWrapper1(file, im_filepath=False):
  global min_row_sampling
  global max_height_sampling
  global width_sampling
  global step_size_sampling
  global X_scaler
  global svc
  
  if im_filepath == True:
    im = cv2.imread(file)
  else:
    im = file
  
  hot_windows, conf = findHotWindows(im, min_row_sampling, max_height_sampling, width_sampling, step_size_sampling, X_scaler, svc)
  
  for w, c in zip(hot_windows, conf):
    drawBox(im, w[0], w[1], w[2], w[3], c=[0, int(255*c), 0], thickness = 5)

  return im

def pipelineWrapper2(file, im_filepath=False):
  global min_row_sampling
  global max_height_sampling
  global width_sampling
  global step_size_sampling
  global X_scaler
  global svc
  global db
  
  if im_filepath == True:
    im = cv2.imread(file)
  else:
    im = file
  hot_windows = findHotWindows(im, min_row_sampling, max_height_sampling, width_sampling, step_size_sampling, X_scaler, svc)
  
  if hot_windows:
    hot_windows = np.array(hot_windows)
    labels, centers = DBSCANClustering(hot_windows, db)
    unique_labels = set(labels)
    for l in unique_labels:
      if l == -1: continue
      masked_windows = hot_windows[(labels == l), :]
      w = boundingWindow(masked_windows)
      drawBox(im, w[0], w[1], w[2], w[3], c=[0, c, 0], thickness = 5)
    
  return im

output = 'project_output.mp4'
clip1 = VideoFileClip('videos/project_video.mp4')
output_clip = clip1.fl_image(pipelineWrapper1)
%time output_clip.write_videofile(output, audio=False)

