In [1]:
import argparse
import os
import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow
import scipy.io
import scipy.misc
import numpy as np
import pandas as pd
import PIL
import tensorflow as tf
from PIL import Image
from keras import backend as K
from keras.layers import Input, Lambda, Conv2D
from keras.models import load_model, Model
from yolo_utils import read_classes, read_anchors, generate_colors, preprocess_image, draw_boxes, scale_boxes
from yad2k.models.keras_yolo import yolo_head, yolo_boxes_to_corners, preprocess_true_boxes, yolo_loss, yolo_body

%matplotlib inline

Using TensorFlow backend.


In [2]:
def yolo_filter_with_threshold(box_confidence, boxes, box_class_probs, threshold = 0.6):
    
    box_scores = box_confidence * box_class_probs
    #assert tensor's shape 19x19x5x80
    
    box_classes = K.argmax(box_scores, axis = -1)
    #assert tensor's shape 19x19x5x1
    box_class_scores = K.max(box_scores, axis = -1)
    #assert tensor's shape 19x19x5x1
    
    filter_mask = box_class_scores >= threshold
    #assert tensor's shape 19x19x5x1, same as box_class_scores.
    #True for boxes greater than threshold, false otherwise
    
    #Apply mask to the outputs
    scores = tf.boolean_mask(box_class_scores, filter_mask)
    boxes = tf.boolean_mask(boxes, filter_mask)
    classes = tf.boolean_mask(box_classes,filter_mask)
    
    return scores,boxes,classes

In [3]:
def yolo_NMS(scores, boxes, classes, max_boxes=10, iou_threshold=0.5):
    
    #create max_boxes_tensor for tf's NMS function and initialize it.
    max_boxes_tensor = K.variable(max_boxes, dtype=tf.int32)
    K.get_session().run(tf.variables_initializer([max_boxes_tensor]))
    
    #run NMS
    nms_indices = tf.image.non_max_suppression(boxes,scores,max_boxes_tensor,iou_threshold)
    
    #Gather the selected vectors
    scores = K.gather(scores,nms_indices)
    boxes = K.gather(boxes,nms_indices)
    classes = K.gather(classes,nms_indices)
    
    return scores, boxes, classes

In [11]:
def yolo_eval(yolo_outputs, image_shape, max_boxes = 10, score_threshold = 0.6, iou_threshold = 0.5):
    
    box_confidence, box_xy, box_wh, box_class_probs = yolo_outputs
    
    #convert x,y,b,h to x1,y1,x2,y2. 
    #x,y is center of box. b,h are breadth and height
    #x1,y1 is upper left corner, x2,y2 is lower right corner of box.
    boxes = yolo_boxes_to_corners(box_xy,box_wh)
    
    #1st tier filtering
    scores, boxes, classes = yolo_filter_with_threshold(box_confidence, boxes, box_class_probs, threshold=score_threshold)
    
    #Trained on 608X608 images, so scale it to our image's resolution
    boxes = scale_boxes(boxes, image_shape)
    
    #2nd tier filtering - Non Max Suppression. Define explicit iou threshold if needed. 
    scores,boxes,classes = yolo_NMS(scores,boxes,classes)
    
    return scores,boxes,classes

In [14]:
def predict(sess, image_file):
    
    #PreProcess image
    image, image_data = preprocess_image("images/"+image_file, model_image_size= (608,608))
    
    out_scores, out_boxes, out_classes = sess.run([scores,boxes,classes], feed_dict={yolo_model.input: image_data, K.learning_phase(): 0})
    #Outputs are now generated. Time to draw the boxes, classes and the confidence scores on input image
    
    #Generate colors for all 80 classes
    colors = generate_colors(class_names)
    
    #Draw boxes on the image
    draw_boxes(image, out_scores, out_boxes, out_classes, class_names, colors)
    
    #Save image in 'out' folder
    image.save(os.path.join("out",image_file), quality=90)
    
    #Opens saved image in default image viewer. COMMENT THESE LINES IF YOU ARE FEEDING LARGE NUMBER OF IMAGES!
    out_img = Image.open("out/"+image_file)
    out_img.show()
    
    return out_scores, out_boxes, out_classes

In [6]:
#load class names and anchor box's aspect ratios.
class_names = read_classes("model_data/coco_classes.txt")
anchors = read_anchors("model_data/yolo_anchors.txt")

In [7]:
yolo_model = load_model("model_data/yolo.h5")
yolo_model.summary()
#Input : (None, 608, 608, 3)
#Output : (None, 19, 19, 5, 85)

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 608, 608, 3)  0                                            
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 608, 608, 32) 864         input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization_1 (BatchNor (None, 608, 608, 32) 128         conv2d_1[0][0]                   
__________________________________________________________________________________________________
leaky_re_lu_1 (LeakyReLU)       (None, 608, 608, 32) 0           batch_normalization_1[0][0]      
__________________________________________________________________________________________________
max_poolin



In [9]:
#Get all image names in images folder. My images were in '.png'. Change it to whatever extension you are using.
files = os.listdir("images/")
images = []
for img in files:
    if img.endswith('.png'):
        images.append(img)

In [16]:
sess = K.get_session()

for img in images:
    print("\nWorking on "+img)
    im = Image.open("images/"+img)
    width,height = im.size
    yolo_outputs = yolo_head(yolo_model.output, anchors, len(class_names))
    scores, boxes, classes = yolo_eval(yolo_outputs, (float(height),float(width)))
    out_scores, out_boxes, out_classes = predict(sess, img)

Working on dashcam_img2.png
car 0.67 (895, 477) (1234, 703)
car 0.69 (1534, 494) (1915, 793)
car 0.79 (1278, 522) (1498, 619)
Working on dashcam_img1.png
traffic light 0.60 (1006, 369) (1033, 410)
car 0.66 (725, 468) (1164, 806)
truck 0.79 (1183, 380) (1904, 831)
car 0.80 (440, 530) (627, 598)
