In [1]:
import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow
import numpy as np
import tensorflow as tf
from keras import backend as K
from keras.models import load_model
from yad2k.utils.yolo_utils import read_classes, read_anchors, generate_colors, preprocess_image, letterbox_image, draw_boxes, scale_boxes
from yad2k.keras_yolo import yolo_boxes_and_scores, yolo_head
from lane_utils import *
import time
%matplotlib inline

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


## 2 - YOLO

For keeping the things simple we will flatten the last two dimensions, from (19, 19, 5, 85) the output of our CNN is flatten to (19, 19, 425).


### Part 1 - Example Filtering with a threshold on class scores

The output result may contain several rectangles that are false positives or overlap, so we need to find a way to reduce them.
The first attempt to reduce these rectangles is to filter them by threshold.


**Input arguments**: 
- `box_confidence`: tensor of shape $(19 \times 19, 5, 1)$ containing $p_c$ (confidence probability that there's some object) for each of the 5 boxes predicted in each of the 19x19 cells.
- `boxes`: tensor of shape $(19 \times 19, 5, 4)$ containing $(b_x, b_y, b_h, b_w)$ for each of the 5 boxes per cell.
- `box_class_probs`: tensor of shape $(19 \times 19, 5, 80)$ containing the detection probabilities $(c_1, c_2, ... c_{80})$ for each of the 80 classes for each of the 5 boxes per cell.



In [2]:
def yolo_filter_boxes(box_confidence, boxes, box_class_probs, threshold = .6):
    """Filters YOLO boxes by thresholding on object and class confidence.
    
    Arguments:
    box_confidence -- tensor of shape (19, 19, 5, 1)
    boxes -- tensor of shape (19, 19, 5, 4)
    box_class_probs -- tensor of shape (19, 19, 5, 80)
    threshold -- real value, if [ highest class probability score < threshold], then get rid of the corresponding box
    
    Returns:
    scores -- tensor of shape (None,), containing the class probability score for selected boxes
    boxes -- tensor of shape (None, 4), containing (b_x, b_y, b_h, b_w) coordinates of selected boxes
    classes -- tensor of shape (None,), containing the index of the class detected by the selected boxes
    
    Note: "None" is here because you don't know the exact number of selected boxes, as it depends on the threshold. 
    For example, the actual output size of scores would be (10,) if there are 10 boxes.
    """
    
    # Step 1: Compute box scores
    box_scores = box_confidence * box_class_probs #multiply box probability(p) with class probability
    
    # Step 2: Find the box_classes thanks to the max box_scores, keep track of the corresponding score
    box_classes = K.argmax(box_scores, axis=-1) #max class index
    box_class_scores = K.max(box_scores, axis=-1) #max class probability
    
    # Step 3: Create a filtering mask based on "box_class_scores" by using "threshold". The mask should have the
    # same dimension as box_class_scores, and be True for the boxes you want to keep (with probability >= threshold)
    filtering_mask = box_class_scores>=threshold
    
    # Step 4: Apply the mask to scores, boxes and classes
    scores = tf.boolean_mask(tensor=box_class_scores, mask=filtering_mask, name='score_mask')
    boxes  = tf.boolean_mask(tensor=boxes, mask=filtering_mask, name='box_mask')
    classes = tf.boolean_mask(tensor=box_classes, mask=filtering_mask, name='classes_mask')
    
    return scores, boxes, classes

### Part 2 : Non-max suppression ###

Even after yolo filtering by thresholding over, we still have a lot of overlapping boxes. Second approach and filtering is Non-Max suppression algorithm.

* Discard all boxes with $Pc <= 0.6$  
* While tehre are any remaining boxes : 
    * Pick the box with the largest $Pc$
    * Output that as a prediction
    * Discard any remaining boxes with $IOU>=0.5$ with the box output in the previous step

<img src="notebook_images/nms_algo.jpg" style="width:25%;height:25%;">
<caption>Example of non max suppression algorithm, on input the aglorithm receive 4 overlapping bounding boxes, and the output returns only one</caption>


Non-max suppression uses the very important function called **"Intersection over Union"**, or IoU.
<img src="notebook_images/iou.png" style="width:500px;height:400;">

**Below is implementation of iou**:

In [3]:

def iou(box1, box2):
    """Implement the intersection over union (IoU) between box1 and box2
    
    Arguments:
    box1 -- first box, list object with coordinates (x1, y1, x2, y2)
    box2 -- second box, list object with coordinates (x1, y1, x2, y2)
    """

    # Calculate the (y1, x1, y2, x2) coordinates of the intersection of box1 and box2. Calculate its Area.
    xi1 = max(box1[0],box2[0])
    yi1 = max(box1[1],box2[1])
    xi2 = min(box1[2], box2[2])
    yi2 = min(box1[3], box2[3])
    inter_area = abs(xi1-xi2) * abs(yi1-yi2)

    # Calculate the Union area by using Formula: Union(A,B) = A + B - Inter(A,B)
    box1_area = abs(box1[0]-box1[2]) * abs(box1[1]-box1[3])
    box2_area = abs(box2[0]-box2[2]) * abs(box2[1]-box2[3])
    union_area = box1_area + box2_area -inter_area
    
    # compute the IoU
    iou = inter_area/union_area

    return iou

In [4]:
box1 = (2, 1, 4, 3)
box2 = (1, 2, 3, 4) 
print("iou = " + str(iou(box1, box2)))

iou = 0.14285714285714285


With computing iou, we co mposed the third part of non max supression algorithm, now, lets combine everything into one :

### 2.4 Wrapping up the filtering

Let's calculate the whole algorithm : 
* Get Yolo CNN output
* Discard all boxes with $Pc <= 0.6$  
* While tehre are any remaining boxes : 
    * Pick the box with the largest $Pc$
    * Output that as a prediction
    * Discard any remaining boxes with $IOU>=0.5$ with the box output in the previous step

In [5]:

def yolo_non_max_suppression(scores, boxes, classes, max_boxes = 10, iou_threshold = 0.5):
    """
    Applies Non-max suppression (NMS) to set of boxes
    
    Arguments:
    scores -- tensor of shape (None,), output of yolo_filter_boxes()
    boxes -- tensor of shape (None, 4), output of yolo_filter_boxes() that have been scaled to the image size (see later)
    classes -- tensor of shape (None,), output of yolo_filter_boxes()
    max_boxes -- integer, maximum number of predicted boxes you'd like
    iou_threshold -- real value, "intersection over union" threshold used for NMS filtering
    
    Returns:
    scores -- tensor of shape (, None), predicted score for each box
    boxes -- tensor of shape (4, None), predicted box coordinates
    classes -- tensor of shape (, None), predicted class for each box
    
    Note: The "None" dimension of the output tensors has obviously to be less than max_boxes. Note also that this
    function will transpose the shapes of scores, boxes, classes. This is made for convenience.
    """
    
    max_boxes_tensor = K.variable(max_boxes, dtype='int32')     # tensor to be used in tf.image.non_max_suppression()
    K.get_session().run(tf.variables_initializer([max_boxes_tensor])) # initialize variable max_boxes_tensor
    
    # Use tf.image.non_max_suppression() to get the list of indices corresponding to boxes you keep
    nms_indices = tf.image.non_max_suppression(boxes=boxes, 
                                               scores=scores,
                                               max_output_size=max_boxes, 
                                               iou_threshold=iou_threshold, name='nms_indices')
    
    scores = K.gather(scores, nms_indices)
    boxes = K.gather(boxes, nms_indices)
    classes = K.gather(classes, nms_indices)
    
    return scores, boxes, classes

In [6]:

def yolo_eval(yolo_outputs,
              anchors,
              num_classes,
              image_shape,
              max_boxes=20,
              score_threshold=.6,
              iou_threshold=.5):
    # 1. Evaluate YOLO model on given input and return filtered boxes.
    # 2. Scale boxes back to original image shape.
    # 3. Filter boxes
    # 4. Apply non max suppression algorithm
    
    num_layers = len(yolo_outputs)
    anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [1,2,3]] # default setting
    input_shape = K.shape(yolo_outputs[0])[1:3] * 32
    boxes = []
    box_scores = []
    for l in range(num_layers):
        _boxes, _box_scores = yolo_boxes_and_scores (yolo_outputs[l],
            anchors[anchor_mask[l]], num_classes, input_shape, image_shape)
        boxes.append(_boxes)
        box_scores.append(_box_scores)
    boxes = K.concatenate(boxes, axis=0)
    box_scores = K.concatenate(box_scores, axis=0)

    mask = box_scores >= score_threshold
    max_boxes_tensor = K.constant(max_boxes, dtype='int32')
    boxes_ = []
    scores_ = []
    classes_ = []
    for c in range(num_classes):
        # TODO: use keras backend instead of tf.
        class_boxes = tf.boolean_mask(boxes, mask[:, c])
        class_box_scores = tf.boolean_mask(box_scores[:, c], mask[:, c])
        nms_index = tf.image.non_max_suppression(
            class_boxes, class_box_scores, max_boxes_tensor, iou_threshold=iou_threshold)
        class_boxes = K.gather(class_boxes, nms_index)
        class_box_scores = K.gather(class_box_scores, nms_index)
        classes = K.ones_like(class_box_scores, 'int32') * c
        boxes_.append(class_boxes)
        scores_.append(class_box_scores)
        classes_.append(classes)
    boxes_ = K.concatenate(boxes_, axis=0)
    scores_ = K.concatenate(scores_, axis=0)
    classes_ = K.concatenate(classes_, axis=0)


    return boxes_, scores_, classes_

## Part 3 :  YOLO V3. Model

For the purpose of this project, I'm using a pretrained weights for Yolo V3.
You can download the weights <a href = "https://pjreddie.com/media/files/yolov3.weights"> here</a>

In [7]:
sess = K.get_session()

### 3.1 - Defining classes, anchors and image shape.

We have gathered the information about the 80 classes and 5 boxes in two files "coco_classes.txt" and "yolo_anchors.txt". Let's load these quantities into the model by running the next cell. 

The car detection dataset has 720x1280 images, which we've pre-processed into 608x608 images. 

# Reading class/anchors data
Bellow we are reading the class labels and pre-computed anchors points with K-means algorithm

To determine the priors, YOLOv3 applies k-means cluster. Then it pre-select 9 clusters. For COCO, the width and height of the anchors are (10×13),(16×30),(33×23),(30×61),(62×45),(59× 119),(116 × 90),(156 × 198),(373 × 326). These 9 priors are grouped into 3 different groups according to their scale. Each group is assigned to a specific feature map above in detecting objects.

In [8]:
anchors  = read_anchors('model_data/yolo_anchors.txt')
class_names = read_classes("model_data/coco_classes.txt")

print(anchors)
image_shape = (720., 1280.)    

[[ 10.  13.]
 [ 16.  30.]
 [ 33.  23.]
 [ 30.  61.]
 [ 62.  45.]
 [ 59. 119.]
 [116.  90.]
 [156. 198.]
 [373. 326.]]


### 3.2 - Loading a pretrained model

For the purpose of this project, I'm using a pretrained weights for Yolo V3.
You can download the weights <a href = "http://pjreddie.com/media/files/yolo.weights"> here</a>

These weights are generated with using pure numpy arrays, but luckily the authors of Yolo provided configuration files containing the full model information, which means we can convert the weights in any high level program language.
Allan Zelener wrote a function for weights conversion, which works well for Yolo V2, and I provided small modifications for Yolo V3 References are at the end of this notebook.

In [9]:
yolo_model = load_model("yolo.h5")




This loads the weights of a trained YOLO model. Here's a summary of the layers your model contains.

In [10]:
yolo_model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, None, None, 3 0                                            
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, None, None, 3 864         input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization_1 (BatchNor (None, None, None, 3 128         conv2d_1[0][0]                   
__________________________________________________________________________________________________
leaky_re_lu_1 (LeakyReLU)       (None, None, None, 3 0           batch_normalization_1[0][0]      
__________________________________________________________________________________________________
zero_paddi

**Note**: Before we go further, I’d like to point out that stride of the network, or a layer is defined as the ratio by which it downsamples the input. In the following examples, I will assume we have an input image of size 416 x 416.

YOLO v3 makes prediction at three scales, which are precisely given by downsampling the dimensions of the input image by 32, 16 and 8 respectively.

### 3.3 - Convert output of the model to usable bounding box tensors

yolo_outputs gave you all the predicted boxes of yolo_model in the correct format. You're now ready to perform filtering and select only the best boxes. Lets now call yolo_eval, which you had previously implemented, to do this.


In [11]:
from PIL import Image, ImageFont, ImageDraw
import os


image = Image.open('test/straight_lines1.jpg')
image_data = preprocess_image(image)

input_image_shape = K.placeholder(shape=(2, ))

score = 0.6
iou = 0.5

boxes, scores, classes = yolo_eval(yolo_model.output, anchors,
                len(class_names), input_image_shape,
                score_threshold=score, iou_threshold=0.45)

print(boxes) 
print(scores)
print(classes)

out_boxes, out_scores, out_classes = sess.run(
    [boxes, scores, classes],
    feed_dict={
        yolo_model.input: image_data,
        input_image_shape: [image.size[1], image.size[0]],
        K.learning_phase(): 0
    })



Tensor("concat_11:0", shape=(?, 4), dtype=float32)
Tensor("concat_12:0", shape=(?,), dtype=float32)
Tensor("concat_13:0", shape=(?,), dtype=int32)


You added `yolo_outputs` to your graph. This set of 4 tensors is ready to be used as input by your `yolo_eval` function.

### 3.5 - Run the graph on an image

Let the fun begin. You have created a (`sess`) graph that can be summarized as follows:

1. <font color='purple'> yolo_model.input </font> is given to `yolo_model`. The model is used to compute the output <font color='purple'> yolo_model.output </font>
2. <font color='purple'> yolo_model.output </font> is processed by `yolo_head`. It gives you <font color='purple'> yolo_outputs </font>
3. <font color='purple'> yolo_outputs </font> goes through a filtering function, `yolo_eval`. It outputs your predictions: <font color='purple'> scores, boxes, classes </font>

**Exercise**: Implement predict() which runs the graph to test YOLO on an image.
You will need to run a TensorFlow session, to have it compute `scores, boxes, classes`.

The code below also uses the following function:
```python
image, image_data = preprocess_image("images/" + image_file, model_image_size = (608, 608))
```
which outputs:
- image: a python (PIL) representation of your image used for drawing boxes. You won't need to use it.
- image_data: a numpy-array representing the image. This will be the input to the CNN.

**Important note**: when a model uses BatchNorm (as is the case in YOLO), you will need to pass an additional placeholder in the feed_dict {K.learning_phase(): 0}.

In [12]:

def predict(sess, image):
    """
    Runs the graph stored in "sess" to predict boxes for "image_file". Prints and plots the preditions.
    
    Arguments:
    sess -- your tensorflow/Keras session containing the YOLO graph
    image_file -- name of an image stored in the "images" folder.
    
    Returns:
    out_scores -- tensor of shape (None, ), scores of the predicted boxes
    out_boxes -- tensor of shape (None, 4), coordinates of the predicted boxes
    out_classes -- tensor of shape (None, ), class index of the predicted boxes
    
    Note: "None" actually represents the number of predicted boxes, it varies between 0 and max_boxes. 
    """
    # Preprocess your image
    image = Image.fromarray(image.astype('uint8'), 'RGB')

    image_data = preprocess_image(image, model_image_size = (416, 416))

    out_boxes, out_scores, out_classes = sess.run(
        [boxes, scores, classes],
        feed_dict={
            yolo_model.input: image_data,
            input_image_shape: [image.size[1], image.size[0]],
            K.learning_phase(): 0
        })
    ### END CODE HERE ###

    # Generate colors for drawing bounding boxes.
    colors = generate_colors(class_names)
    # Draw bounding boxes on the image file
    drawer=draw_boxes(image, out_scores, out_boxes, out_classes, class_names, colors)
  

    return out_scores, out_boxes, out_classes, drawer

Run the following cell on the "test.jpg" image to verify that your function is correct.

In [19]:
def pipeline_final(img):

    start_cars = time.time()
    out_scores, out_boxes, out_classes, image = predict(sess, img)
    end_cars = time.time()
    
    start_lanes = time.time()
    initial_lane,left_fit, right_fit, left_fitx, right_fitx, ploty, filtered_image, warped, deep_search, out_img = pipeline_lanes(np.copy(img))
    result=cv2.addWeighted(image, 1, initial_lane, 0.3, 1)
    
    
    result  = add_diagnostic_image(result, filtered_image, 0)
    result  = add_diagnostic_image(result, warped, 1)
    result  = add_diagnostic_image(result, deep_search, 2)
    result  = add_diagnostic_image(result, out_img, 3)
    
    end_lanes = time.time()
    fps_lanes = 1/(end_lanes-start_lanes)
    fps_cars = 1/(end_cars - start_cars)
    fps_text_cars = "FPS cars : %.2f" % fps_cars
    fps_text_lanes = "FPS lanes : %.2f" % fps_lanes

    result  = add_diagnostic_text(result, fps_text_lanes, 4, offset=50)
    result  = add_diagnostic_text(result, fps_text_cars, 4, offset=90)


    return result




In [20]:
def pipeline_lanes(img):

    global caches 
    
    is_blind = False
    left_fit, right_fit = None, None

    if caches:
        left_fit, right_fit, is_blind  = caches
        
    filtered_image = filter_image(img, is_blind)
       
    warped, filled = perspective_transform_with_filled_area(img, filtered_image)
    
    #The line below is added only for the first initial frame and for diagnostic purposes 
    ploty, left_fitx, right_fitx, left_fit, right_fit, deep_search = get_lane_rectangles(warped, left_fit, right_fit, is_blind)
    if not caches : 
        caches = [left_fit, right_fit, is_blind]

    if caches : 
        left_fit, right_fit, is_blind  = caches
        out_img, ploty, left_fitx, right_fitx, left_fit, right_fit, is_blind = get_next_frame_lines(warped, left_fit, right_fit, is_blind)
        caches = [left_fit, right_fit, is_blind]

            

    result = inverse_perspective_transform(img, warped, left_fitx, right_fitx, ploty)
    

    return result, left_fit, right_fit, left_fitx, right_fitx, ploty, filtered_image, warped, deep_search,out_img

In [None]:
from moviepy.editor import VideoFileClip

caches=[]
output = 'custom_output.mp4'
clip1 = VideoFileClip("project_video.mp4")
white_clip = clip1.fl_image(pipeline_final) 
%time white_clip.write_videofile(output, audio=False)

[MoviePy] >>>> Building video custom_output.mp4
[MoviePy] Writing video custom_output.mp4


  1%|▏         | 11/744 [00:01<01:57,  6.22it/s]