In [49]:
import numpy as np
import tensorflow as tf
from keras import backend as kb

def yolo_filter_boxes(box_confidence, boxes, box_class_probs, threshold = 0.6):
    """
    Arguments:
    box_confidence -- tensor of shape (S, S, B, 1)
    boxes -- tensor of shape (S, S, B, 4)
    box_class_probs -- tensor of shape (S, S, B, C)
    threshold -- real value, if [ highest class probability score < threshold], then get rid of the corresponding box

    S - number of grid, B - number of anchor boxes, C - classes

    Returns:
    scores -- tensor of shape (None,), containing the class probability score for selected boxes
    boxes -- tensor of shape (None, 4), containing (b_x, b_y, b_h, b_w) coordinates of selected boxes
    classes -- tensor of shape (None,), containing the index of the class detected by the selected boxes

    Note: "None" is here because you don't know the exact number of selected boxes, as it depends on the threshold. 
    For example, the actual output size of scores would be (10,) if there are 10 boxes.    
    """

    box_scores = tf.multiply(box_confidence, box_class_probs)    # confidence broadcasted
    box_max_classes = tf.argmax(box_scores, axis=-1)
    box_max_scores = kb.max(box_scores, axis=-1)
    
    mask = box_max_scores >= threshold
    
    scores = tf.boolean_mask(box_max_scores, mask)
    boxes = tf.boolean_mask(boxes, mask)
    classes = tf.boolean_mask(box_max_classes, mask)
    
    return scores, boxes, classes
    
    
def iou(pivot_box, boxes):
    """
    Arguments:
    pivot_box -- one box, tensor of shape (4) (x1, y1, x2, y2)
    boxes -- comparing boxes, tensor of shape (None, 4) (x1, y1, x2, y2)
    
    Returns:
    ious -- tensor of shape (None, 1), iou values 
    """        
    px1 = tf.minimum(pivot_box[0], pivot_box[2])
    px2 = tf.maximum(pivot_box[0], pivot_box[2])
    py1 = tf.minimum(pivot_box[1], pivot_box[3])
    py2 = tf.maximum(pivot_box[1], pivot_box[3])
    
    bx1 = tf.minimum(boxes[:, 0], boxes[:, 2])
    bx2 = tf.maximum(boxes[:, 0], boxes[:, 2])
    by1 = tf.minimum(boxes[:, 1], boxes[:, 3])
    by2 = tf.maximum(boxes[:, 1], boxes[:, 3])
    
    ix = tf.subtract(tf.minimum(px2, bx2), tf.maximum(px1, bx1))
    iy = tf.subtract(tf.minimum(py2, by2), tf.maximum(py1, by1))
    
    intersect = tf.maximum(tf.multiply(ix, iy), 0.)
    b1 = tf.multiply(tf.subtract(px2, px1), tf.subtract(py2, py1))
    b2 = tf.multiply(tf.subtract(bx2, bx1), tf.subtract(by2, by1))
    union = tf.subtract(tf.add(b1, b2), intersect)
    
    return tf.divide(intersect, union)
    
    
def non_max_suppression(scores, boxes, classes, max_boxes=10, iou_threshold=0.5):
    """
    Applies Non-max suppression (NMS) to set of boxes
    
    Arguments:
    scores -- tensor of shape (None,), output of yolo_filter_boxes()
    boxes -- tensor of shape (None, 4), output of yolo_filter_boxes() that have been scaled to the image size (see later)
    classes -- tensor of shape (None,), output of yolo_filter_boxes()
    max_boxes -- integer, maximum number of predicted boxes you'd like
    iou_threshold -- real value, "intersection over union" threshold used for NMS filtering
    
    Returns:
    scores -- tensor of shape (None,), predicted score for each box
    boxes -- tensor of shape (None, 4), predicted box coordinates
    classes -- tensor of shape (None,), predicted class for each box
    
    Note: The "None" dimension of the output tensors has obviously to be less than max_boxes. 
    """
    
    nms_indices = tf.constant([], dtype=tf.int64)
    z_score = tf.zeros_like(scores)
    z_box = tf.zeros_like(boxes)
    
    def cond(tscores, tboxes, indices):
        return tf.logical_not(tf.reduce_all(tf.equal(tscores, z_score)))
    
    def body(tscores, tboxes, indices):
        # find the box with maximum score
        maxidx = tf.argmax(tscores)
        indices = tf.concat([indices, [maxidx]], axis=-1)
        
        # mark the rests that have less scores than iou_threshold
        iou_mat = iou(tboxes[maxidx], tboxes)
        mask = tf.less_equal(iou_mat, iou_threshold)

        tscores = tf.where(mask, tscores, z_score)
        tboxes = tf.where(mask, tboxes, z_box)
        
        return [tscores, tboxes, indices]
 
    final = tf.while_loop(cond, body, loop_vars=[scores, boxes, nms_indices],\
                          shape_invariants=[scores.get_shape(), boxes.get_shape(), tf.TensorShape([None, ])], \
                          maximum_iterations=max_boxes)
                
    scores = kb.gather(scores, final[2])
    boxes = kb.gather(boxes, final[2])
    classes = kb.gather(classes, final[2])
    
    return scores, boxes, classes

In [50]:

def yolo_non_max_suppression(scores, boxes, classes, max_boxes = 10, iou_threshold = 0.5):
    """
    Applies Non-max suppression (NMS) to set of boxes
    
    Arguments:
    scores -- tensor of shape (None,), output of yolo_filter_boxes()
    boxes -- tensor of shape (None, 4), output of yolo_filter_boxes() that have been scaled to the image size (see later)
    classes -- tensor of shape (None,), output of yolo_filter_boxes()
    max_boxes -- integer, maximum number of predicted boxes you'd like
    iou_threshold -- real value, "intersection over union" threshold used for NMS filtering
    
    Returns:
    scores -- tensor of shape (, None), predicted score for each box
    boxes -- tensor of shape (4, None), predicted box coordinates
    classes -- tensor of shape (, None), predicted class for each box
    
    Note: The "None" dimension of the output tensors has obviously to be less than max_boxes. Note also that this
    function will transpose the shapes of scores, boxes, classes. This is made for convenience.
    """
    
    max_boxes_tensor = kb.variable(max_boxes, dtype='int32')     # tensor to be used in tf.image.non_max_suppression()
    kb.get_session().run(tf.variables_initializer([max_boxes_tensor])) # initialize variable max_boxes_tensor
    
    # Use tf.image.non_max_suppression() to get the list of indices corresponding to boxes you keep
    ### START CODE HERE ### (≈ 1 line)
    nms_indices = tf.image.non_max_suppression(boxes, scores, max_boxes_tensor, iou_threshold)
    ### END CODE HERE ###
    
    print(nms_indices.eval())
    
    # Use K.gather() to select only nms_indices from scores, boxes and classes
    ### START CODE HERE ### (≈ 3 lines)
    scores = kb.gather(scores, nms_indices)
    boxes = kb.gather(boxes, nms_indices)
    classes = kb.gather(classes, nms_indices)
    ### END CODE HERE ###
    
    return scores, boxes, classes

In [51]:
with tf.Session() as sess:
    
    scores = tf.random_normal([54,], mean=1, stddev=4, seed = 1)
    boxes = tf.random_normal([54, 4], mean=1, stddev=4, seed = 1)
    classes = tf.random_normal([54,], mean=1, stddev=4, seed = 1)
    """
    scores = tf.constant([1, 2, 3, 4, 5, 1], dtype=tf.float32)
    boxes = tf.constant([[0, 0, 1, 1], [0, 0, 2, 2], [0, 0, 2, 1], [4, 4, 5, 6], [4, 4, 5, 5], [6, 6, 7, 10]], dtype=tf.float32)
    classes = tf.constant([1, 1, 1, 1, 1, 2], dtype=tf.float32)
    #sess.run(tf.global_variables_initializer())
    """
    
    scores, boxes, classes = non_max_suppression(scores, boxes, classes, max_boxes=10)

    print("scores[2] = " + str(scores[2].eval()))
    print("boxes[2] = " + str(boxes[2].eval()))
    print("classes[2] = " + str(classes[2].eval()))

    print("scores.shape = " + str(scores.eval().shape))
    print("boxes.shape = " + str(boxes.eval().shape))
    print("classes.shape = " + str(classes.eval().shape))

scores[2] = 6.9384
boxes[2] = [-5.299932    3.13798141  4.45036697  0.95942086]
classes[2] = -2.24527
scores.shape = (10,)
boxes.shape = (10, 4)
classes.shape = (10,)
