## Load necessary modules

In [None]:
# show images inline
%matplotlib inline

# automatically reload modules when they have changed
# %load_ext autoreload
# %autoreload 2

# import keras
import keras

# import keras_retinanet
from keras_retinanet.models.resnet import custom_objects
from keras_retinanet.utils.image import read_image_bgr, preprocess_image, resize_image
from keras_retinanet.utils.visualization import draw_box, draw_caption
from keras_retinanet.utils.colors import label_color
import keras.backend as K
# import miscellaneous modules
import matplotlib.pyplot as plt
import cv2
import os
import numpy as np
import time

# set tf backend to allow memory to grow, instead of claiming everything
import tensorflow as tf

def get_session():
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    return tf.Session(config=config)

# use this environment flag to change which GPU to use
#os.environ["CUDA_VISIBLE_DEVICES"] = "1"

# set the modified tf session as backend in keras
keras.backend.tensorflow_backend.set_session(get_session())

## Load RetinaNet model

In [None]:
# adjust this to point to your downloaded/trained model
model_path = 'resnet50_coco_best_v2.0.2.h5'

# load retinanet model
retinanet = keras.models.load_model(model_path, custom_objects=custom_objects)
#print(model.summary())

# load label to names mapping for visualization purposes
labels_to_names = {0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 13: 'bench', 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase', 29: 'frisbee', 30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle', 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', 46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli', 51: 'carrot', 52: 'hot dog', 53: 'pizza', 54: 'donut', 55: 'cake', 56: 'chair', 57: 'couch', 58: 'potted plant', 59: 'bed', 60: 'dining table', 61: 'toilet', 62: 'tv', 63: 'laptop', 64: 'mouse', 65: 'remote', 66: 'keyboard', 67: 'cell phone', 68: 'microwave', 69: 'oven', 70: 'toaster', 71: 'sink', 72: 'refrigerator', 73: 'book', 74: 'clock', 75: 'vase', 76: 'scissors', 77: 'teddy bear', 78: 'hair drier', 79: 'toothbrush'}

In [None]:
img_input = keras.layers.Input(shape=(None,None,3),name='img_input')
# scale_input = keras.layers.Input(shape=(1,),name='scale_input')
# human_bbox_input = keras.layers.Input(shape=(None,None,4),name='human_bbox_input')
# object_bbox_input = keras.layers.Input(shape=(None,None,4),name='object_bbox_input')

In [None]:
# retinanet.summary()

In [None]:
# GRADED FUNCTION: yolo_non_max_suppression

def yolo_non_max_suppression(scores, boxes, classes, max_boxes = 10, iou_threshold = 0.5):
    """
    Applies Non-max suppression (NMS) to set of boxes
    
    Arguments:
    scores -- tensor of shape (None,), output of yolo_filter_boxes()
    boxes -- tensor of shape (None, 4), output of yolo_filter_boxes() that have been scaled to the image size (see later)
    classes -- tensor of shape (None,), output of yolo_filter_boxes()
    max_boxes -- integer, maximum number of predicted boxes you'd like
    iou_threshold -- real value, "intersection over union" threshold used for NMS filtering
    
    Returns:
    scores -- tensor of shape (, None), predicted score for each box
    boxes -- tensor of shape (4, None), predicted box coordinates
    classes -- tensor of shape (, None), predicted class for each box
    
    Note: The "None" dimension of the output tensors has obviously to be less than max_boxes. Note also that this
    function will transpose the shapes of scores, boxes, classes. This is made for convenience.
    """
    
    max_boxes_tensor = K.variable(max_boxes, dtype='int32')     # tensor to be used in tf.image.non_max_suppression()
    K.get_session().run(tf.variables_initializer([max_boxes_tensor])) # initialize variable max_boxes_tensor
    
    # Use tf.image.non_max_suppression() to get the list of indices corresponding to boxes you keep
    ### START CODE HERE ### (≈ 1 line)
    nms_indices = tf.image.non_max_suppression(boxes, scores, max_boxes, iou_threshold)
    ### END CODE HERE ###
    
    # Use K.gather() to select only nms_indices from scores, boxes and classes
    ### START CODE HERE ### (≈ 3 lines)
    scores =  K.gather(scores, nms_indices)
    boxes =  K.gather(boxes, nms_indices)
    classes =  K.gather(classes, nms_indices)
    ### END CODE HERE ###
    
    return scores, boxes, classes

In [None]:
def non_max_suppression(ip):
    bbox = ip[0]
    classification = ip[1]

    predicted_labels = K.argmax(classification, axis=2)
    scores = K.max(classification,axis=2)

    filtering_mask = scores >= 0.6

    scores = tf.boolean_mask(scores, filtering_mask) 
    boxes = tf.boolean_mask(bbox, filtering_mask) 
    classes = tf.boolean_mask(predicted_labels, filtering_mask) 
    scores, boxes, classes = yolo_non_max_suppression(scores, boxes, classes, max_boxes = 10, iou_threshold = 0.5)
    return [scores, boxes, classes]

In [None]:
# retinanet.summary()

In [None]:
_,_,bbox,classification=retinanet(img_input)

# classes = keras.layers.Lambda(non_max_suppression)([boxes, nms_classification])
scores, boxes, classes= keras.layers.Lambda(non_max_suppression)([bbox, classification])
# need resize


In [None]:
model_all = keras.Model(inputs=img_input,outputs=[scores, boxes, classes])


In [None]:
model_all.summary()

In [None]:
image = read_image_bgr('/projectdata/cht01/hico_20160224_det/images/train2015/HICO_train2015_00000001.jpg')

# copy to draw on
draw = image.copy()
draw = cv2.cvtColor(draw, cv2.COLOR_BGR2RGB)

# preprocess image for network
image = preprocess_image(image)
image, scale = resize_image(image)

# process image
start = time.time()
scores, boxes, classes = model_all.predict_on_batch(np.expand_dims(image, axis=0))
print("processing time: ", time.time() - start)


In [None]:
image = read_image_bgr('/projectdata/cht01/hico_20160224_det/images/train2015/HICO_train2015_00000001.jpg')[:,:,::-1]
plt.imshow(image)

In [None]:
scores

In [None]:
classes

In [None]:
boxes

In [None]:
scores.shape,classes.shape,boxes.shape

## Run detection on example

In [None]:
# load image
image = read_image_bgr('/projectdata/cht01/hico_20160224_det/images/train2015/HICO_train2015_00000010.jpg')

# copy to draw on
draw = image.copy()
draw = cv2.cvtColor(draw, cv2.COLOR_BGR2RGB)

# preprocess image for network
image = preprocess_image(image)
image, scale = resize_image(image)

# process image
start = time.time()
_, _, boxes, nms_classification = retinanet.predict_on_batch(np.expand_dims(image, axis=0))
print("processing time: ", time.time() - start)

# compute predicted labels and scores
predicted_labels = np.argmax(nms_classification[0, :, :], axis=1)
scores = nms_classification[0, np.arange(nms_classification.shape[1]), predicted_labels]

# correct for image scale
boxes /= scale

# visualize detections
for idx, (label, score) in enumerate(zip(predicted_labels, scores)):
    if score < 0.5:
        continue
    print(idx)
    color = label_color(label)

    b = boxes[0, idx, :].astype(int)
    draw_box(draw, b, color=color)
    
    caption = "{} {:.3f}".format(labels_to_names[label], score)
    draw_caption(draw, b, caption)
    
plt.figure(figsize=(15, 15))
plt.axis('off')
plt.imshow(draw)
plt.show()