In [None]:
%matplotlib inline
%reload_ext autoreload
%autoreload 2

# Analyze each frame of a video for cars using a pretrained Neural Net

In [None]:
from keras.models import load_model
import cv2
import numpy as np
from moviepy.editor import VideoFileClip
import pdb
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

In [None]:
model = load_model('model.h5')

In [None]:
filename = './data/vehicles/3.png'
image = cv2.imread(filename)
# image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
plt.imshow(image)

In [None]:
float(model.predict(image[None, :, :, :], batch_size=1))

In [None]:
def draw_boxes(img, bboxes, color=(0, 0, 255), thick=6):
    # Make a copy of the image
    imcopy = np.copy(img)
    # Iterate through the bounding boxes
    for bbox in bboxes:
        # Draw a rectangle given bbox coordinates
        cv2.rectangle(imcopy, bbox[0], bbox[1], color, thick)
    # Return the image copy with boxes drawn
    return imcopy

In [None]:
# Define a function that takes an image,
# start and stop positions in both x and y, 
# window size (x and y dimensions),  
# and overlap fraction (for both x and y)
def slide_window(img, x_start_stop=(None, None), y_start_stop=(None, None), 
                    xy_window=(64, 64), xy_overlap=(0.5, 0.5)):
    # Compute the span of the region to be searched    
    xspan = x_start_stop[1] - x_start_stop[0]
    yspan = y_start_stop[1] - y_start_stop[0]
    # Compute the number of pixels per step in x/y
    nx_pix_per_step = np.int(xy_window[0]*(1 - xy_overlap[0]))
    ny_pix_per_step = np.int(xy_window[1]*(1 - xy_overlap[1]))
    # Compute the number of windows in x/y
    nx_buffer = np.int(xy_window[0]*(xy_overlap[0]))
    ny_buffer = np.int(xy_window[1]*(xy_overlap[1]))
    nx_windows = np.int((xspan-nx_buffer)/nx_pix_per_step) 
    ny_windows = np.int((yspan-ny_buffer)/ny_pix_per_step) 
    # Initialize a list to append window positions to
    window_list = []
    # Loop through finding x and y window positions
    for ys in range(ny_windows):
        for xs in range(nx_windows):
            # Calculate window position
            startx = xs*nx_pix_per_step + x_start_stop[0]
            endx = startx + xy_window[0]
            starty = ys*ny_pix_per_step + y_start_stop[0]
            endy = starty + xy_window[1]
            # Append window position to list
            window_list.append(((startx, starty), (endx, endy)))
    # Return the list of windows
    return window_list

In [None]:
def search_windows(img, windows):
    cars = []
    counter = 0
    for window in windows:
        small_img = cv2.resize(img[window[0][1]:window[1][1], window[0][0]:window[1][0]], (64, 64))
        label = float(model.predict(small_img[None, :, :, :], batch_size=1))
        
        if label > 0.9:
            cars.append(window)
            
    return cars

In [None]:
h = 720
w = 1280
w_margin = 10
def process_image(img):
    # TODO: Is this the same style of image as I trained the network on? plt.imread vs cv2.imread 
    windows = slide_window(img, (0+w_margin, w-w_margin), (150, h), xy_window=(100,100))
    car_boxes = search_windows(img, windows)
    img_with_cars = draw_boxes(img, car_boxes)
    return img_with_cars

In [None]:
output_file = 'output_images/video.mp4'
# input_clip = VideoFileClip('test_video.mp4') # Short video
input_clip = VideoFileClip('project_video.mp4').subclip(29,33) # Long video
output_clip = input_clip.fl_image(process_image) # NOTE: this function expects color images
%time output_clip.write_videofile(output_file, audio=False)

input_clip.reader.close()
input_clip.audio.reader.close_proc()

output_clip.reader.close()
output_clip.audio.reader.close_proc()

del input_clip
del output_clip