<a href="https://colab.research.google.com/github/arnavvats/object-detection/blob/master/yolo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
!git clone https://github.com/allanzelener/yad2k.git
!curl -O https://raw.githubusercontent.com/enggen/Deep-Learning-Coursera/master/Convolutional%20Neural%20Networks/Week3/Car%20detection%20for%20Autonomous%20Driving/images/0001.jpg
  

Cloning into 'yad2k'...
remote: Enumerating objects: 243, done.[K
remote: Total 243 (delta 0), reused 0 (delta 0), pack-reused 243[K
Receiving objects: 100% (243/243), 2.35 MiB | 6.52 MiB/s, done.
Resolving deltas: 100% (106/106), done.


In [0]:
import argparse
import os
import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow
import scipy.io
import scipy.misc
import numpy as np
import pandas as pd
import PIL
import tensorflow as tf
from keras import backend as K
from keras.layers import Input, Lambda, Conv2D
from keras.models import load_model, Model


In [0]:
import colorsys
import imghdr
import os
import random
from keras import backend as K

import numpy as np
from PIL import Image, ImageDraw, ImageFont

def read_classes(classes_path):
  with open(classes_path) as f:
    class_names = f.readlines()
  class_names = [c.strip() for c in class_names]
  return class_names

def read_anchors(anchors_path):
  with open(anchors_path) as f:
    anchors = [float(x) for x in anchors.split(',')]
    anchors = np.array(anchors).reshape(-1, 2)
  return anchors
  
def generate_colors(class_names):
  hsv_tuples = [(x / len(class_names),1.,1.) for x in range(len(class_names))]
  colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
  random.seed(0)
  random.shuffle(colors)
  random.seed(None)
  return colors

def scale_boxes(boxes, image_shape):
  height = image_shape[0]
  width = image_shape[1]
  image_dims = K.stack([height, width, height, width])
  image_dims = K.reshape(image_dims, [1, 4])
  boxes = boxes * image_dims
  return boxes

def preprocess_image(image_path, model_image_size):
  image_type = imghdr.what(img_path)
  image = Image.open(img_path)
  resized_image = image.resize(tuple(reversed(model_image_size)), Image.BICUBIC)
  image_data = np.array(resized_image, dtype = 'float32')
  image_data /= 255.
  image_data = np.expand_dims(image_data, 0) # Add batch dimesion
  return image, image_data

def draw_boxes(image, out_scores, out_boxes, out_classes, class_names, colors):
  
  font = ImageFont.truetype(font = 'font/FiraMono-Medium.otf', size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32'))
  thickness = (image.size[0] + image.size[1]) // 300
  
  for i, c in reversed(list(enumerate(out_classes))):
    predicted_class = class_names[c]
    box = out_boxes[i]
    score = out_scores[i]
    
    label = '{} {:.2f}'.format(predicted_class, score)
    draw = ImageDraw.Draw(image)
    label_size = draw.textsize(label, font)
    
    top, left, bottom, right = box
    top = max(0, np.floor(top + 0.5).astype('int32'))
    left = max(0, np.floor(left + 0.5).astype('int32'))
    bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32'))
    right = min(image.size[0], np.floor(right + 0.5).astype('int32'))
    print(label, (left, top), (right, bottom))
    
    if top - label_size[1] >= 0:
      text_origin = np.array([left, top - label_size[1]])
    else:
      text_origin = np.array([left, top + 1])
      
    for i in range(thickness):
      draw.rectangle([left + i, top + i, right - i, bottom - i], outline = colors[c])
    
    draw.rectange([tuple(text_origin), tuple(text_origin + label_sie)], fill = colors[c])
    draw.text(text_origin, label, fill = (0, 0, 0), font = font)
    del draw
      
  

In [0]:
def yolo_filter_boxes(box_confidence, boxes, box_class_probs, threshold = .6):
  
  box_scores = box_confidence * box_class_probs
  box_classes = K.argmax(box_scores, axis = -1)
  box_classes_scores = K.max(box_scores, axis = -1)
  filtering_mask = box_class_scores > threshold
  scores = tf.boolean_mask(box_class_scores, filtering_mask)
  boxes = tf.boolean_mask(boxes, filtering_mask)
  classes = box_classes[filtering_mask]
  
  return scores, boxes, classes

In [0]:
def iou(box1, box2):
  xi1 = max(box1[0], box2[0])
  yi1 = max(box1[1], box2[1])
  xi2 = min(box1[2], box2[2])
  yi2 = min(box1[3], box2[3])
  inter_area = max(xi2 - xi1, 0) * max(yi2 - yi1, 0)
  box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
  box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])
  union_area = box1_area + box2_area - inter_area
  iou = inter_area / union_area
  return iou

In [0]:
def yolo_non_max_suppression(scores, boxes, classes, max_boxes = 10, iou_threshold = 0.5):
  max_boxes_tensor = K.variable(max_boxes, dtype='int32')
  K.get_session().run(tf.variables_initializer([max_boxes_tensor]))
  nms_indices = tf.image.non_max_suppression(boxes, scores, max_boxes, iou_threshold)
  scores = K.gather(scores,nms_indices)
  boxes = K.gather(boxes,nms_indices)
  classes = K.gather(classes,nms_indices)
  return scores, boxes, classes
  

In [0]:
def yolo_eval(yolo_outputs, image_shape = (720., 1280), max_boxes = 10, score_threshold = .6, iou_threshold = .5):
  box_confidence, box_xy, box_wh, box_class_probs = yolo_outputs
  boxes = yolo_boxes_to_corners(box_xy, box_wh)
  scores, boxes, classes = yolo_filter_boxes(box_confidence, boxes, box_class_probs, iou_threshold)
  boxes = scale_boxes(boxes, image_shape)
  score, boxes, classes = yolo_non_max_suppression(scores, boxes, classes, iou_threshold)
  return scores, boxes,classes
  

In [0]:
sess = K.get_session()
class_names = read_classes("model_data/coco_classes.txt")
anchors = read_anchors("model_data/yolo_anchors.txt")
image_shape = (720., 1280.)

In [44]:
yolo_model = load_model("model_data/yolo.h5")

NameError: ignored

In [0]:
yolo_model.summary()

In [0]:
yolo_output = yolo_head(yolo_model.output, anchors, len(classes))

In [0]:
scores, boxes, classes = yolo_eval(yolo_outputs, image_shape)

In [0]:
def predict(sess, image_file):
  image, image_data = preprocess_image("images/" + image_file, model_image_size = (608, 608))
  out_scores, out_boxes, out_classes = sess.run([scores, boxes,classes], feed_dict = {yolo_model.input: image_data, K.learning_phase(): 0})
  print('Found {} boxes for {}'.format(len(out_boxes), image_file))
  colors = generate_colors(class_names)
  draw_boxes(image, out_scores, out_boxes, out_classes, class_names, colors)
  image.save(os.path.join("out", image_file), quality = 90)
  output_image = scipy.misc.imread(os.path.join("out", image_file))
  imshow(output_image)
  return out_scores, out_boxes, out_classes

In [0]:
out_scores, out_boxes, out_classes = predict(sess, "test.jpg")