In [164]:
import numpy as np
import tensorflow as tf
from keras import backend as kb

def yolo_filter_boxes(box_confidence, boxes, box_class_probs, threshold = 0.6):
    """
    Arguments:
    box_confidence -- tensor of shape (S, S, B, 1)
    boxes -- tensor of shape (S, S, B, 4)
    box_class_probs -- tensor of shape (S, S, B, C)
    threshold -- real value, if [ highest class probability score < threshold], then get rid of the corresponding box

    S - number of grid, B - number of anchor boxes, C - classes

    Returns:
    scores -- tensor of shape (None,), containing the class probability score for selected boxes
    boxes -- tensor of shape (None, 4), containing (b_x, b_y, b_h, b_w) coordinates of selected boxes
    classes -- tensor of shape (None,), containing the index of the class detected by the selected boxes

    Note: "None" is here because you don't know the exact number of selected boxes, as it depends on the threshold. 
    For example, the actual output size of scores would be (10,) if there are 10 boxes.    
    """

    box_scores = tf.multiply(box_confidence, box_class_probs)    # confidence broadcasted
    box_max_classes = tf.argmax(box_scores, axis=-1)
    box_max_scores = kb.max(box_scores, axis=-1)
    
    mask = box_max_scores >= threshold
    
    scores = tf.boolean_mask(box_max_scores, mask)
    boxes = tf.boolean_mask(boxes, mask)
    classes = tf.boolean_mask(box_max_classes, mask)
    
    return scores, boxes, classes
    
    
def iou(box1, box2):
    """
    Arguments:
    box1 -- first box, list object with coordinates (x1, y1, x2, y2)
    box2 -- second box, list object with coordinates (x1, y1, x2, y2)
    """        
    ix = tf.subtract(tf.minimum(box1[2], box2[2]), tf.maximum(box1[0], box2[0]))
    iy = tf.subtract(tf.minimum(box1[3], box2[3]), tf.maximum(box1[1], box2[1]))
    
    intersect = tf.multiply(tf.maximum(ix, 0), tf.maximum(iy, 0))
    b1 = tf.multiply(tf.subtract(box1[2], box1[0]), tf.subtract(box1[3], box1[1]))
    b2 = tf.multiply(tf.subtract(box2[2], box2[0]), tf.subtract(box2[3], box2[1]))
    union = tf.subtract(tf.add(b1, b2), intersect)
    
    return tf.divide(intersect, union)
    
    
def non_max_suppression(scores, boxes, classes, max_boxes=10, iou_threshold=0.5):
    """
    Applies Non-max suppression (NMS) to set of boxes
    
    Arguments:
    scores -- tensor of shape (None,), output of yolo_filter_boxes()
    boxes -- tensor of shape (None, 4), output of yolo_filter_boxes() that have been scaled to the image size (see later)
    classes -- tensor of shape (None,), output of yolo_filter_boxes()
    max_boxes -- integer, maximum number of predicted boxes you'd like
    iou_threshold -- real value, "intersection over union" threshold used for NMS filtering
    
    Returns:
    scores -- tensor of shape (, None), predicted score for each box
    boxes -- tensor of shape (4, None), predicted box coordinates
    classes -- tensor of shape (, None), predicted class for each box
    
    Note: The "None" dimension of the output tensors has obviously to be less than max_boxes. Note also that this
    function will transpose the shapes of scores, boxes, classes. This is made for convenience.
    """
    
    nms_indices = np.zeros(max_boxes, dtype=int)
    tmp = scores
    for i in range(max_boxes):
        maxidx = tf.argmax(tmp)
        mask = tf.map_fn(lambda box: tf.less(iou(boxes[maxidx], box), iou_threshold), boxes, dtype=tf.bool)

        tmp = tf.boolean_mask(tmp, mask)
        nms_indices[i] = maxidx.eval()
    
    scores = kb.gather(scores, nms_indices)
    boxes = kb.gather(boxes, nms_indices)
    classes = kb.gather(classes, nms_indices)
    
    return scores, boxes, classes

In [166]:
with tf.Session() as test_b:
    scores = tf.random_normal([54,], mean=1, stddev=4, seed = 1)
    boxes = tf.random_normal([54, 4], mean=1, stddev=4, seed = 1)
    classes = tf.random_normal([54,], mean=1, stddev=4, seed = 1)
    scores, boxes, classes = non_max_suppression(scores, boxes, classes, max_boxes=10)
    print("scores[2] = " + str(scores[2].eval()))
    print("boxes[2] = " + str(boxes[2].eval()))
    print("classes[2] = " + str(classes[2].eval()))
    print("scores.shape = " + str(scores.eval().shape))
    print("boxes.shape = " + str(boxes.eval().shape))
    print("classes.shape = " + str(classes.eval().shape))


[12  3  6 49 33 40 37 40 27 23]
scores[2] = 6.0194
boxes[2] = [  6.41169071   2.62523627   3.64737511  10.97169209]
classes[2] = 3.37129
scores.shape = (10,)
boxes.shape = (10, 4)
classes.shape = (10,)
[ -0.99814534   5.00565434  -1.99267793  -3.4164257   -1.0503962
   6.51658058  -0.27600873   6.51658058 -10.05641556   6.34522581]


In [108]:
S = 19
with tf.Session() as test_a:
    box_confidence = tf.random_normal([S, S, 5, 1], mean=1, stddev=4, seed = 1)
    boxes = tf.random_normal([S, S, 5, 4], mean=1, stddev=4, seed = 1)
    box_class_probs = tf.random_normal([S, S, 5, 80], mean=1, stddev=4, seed = 1)
    scores, boxes, classes = yolo_filter_boxes(box_confidence, boxes, box_class_probs, threshold = 0.5)
    print("scores[2] = " + str(scores[2].eval()))
    print("boxes[2] = " + str(boxes[2].eval()))
    print("classes[2] = " + str(classes[2].eval()))
    print("scores.shape = " + str(scores.shape))
    print("boxes.shape = " + str(boxes.shape))
    print("classes.shape = " + str(classes.shape))

   # scores, boxes, classes = non_max_suppression(scores, boxes, classes, 10)
    
    box1 = (2, 1, 4, 3)
    box2 = (1, 2, 3, 4) 
    print("iou = " + str(iou(box1, box2).eval()))

scores[2] = 10.7506
boxes[2] = [ 8.42653275  3.27136683 -0.53134358 -4.94137335]
classes[2] = 7
scores.shape = (?,)
boxes.shape = (?, 4)
classes.shape = (?,)
iou = 0.142857142857


In [103]:
def dist(a, b):
    return tf.add(tf.multiply(a[0], b[0]), tf.multiply(a[1], b[1]))

with tf.Session() as test_a:
    a = np.array([[1, 1], [2, 2]])
    aa = np.array([1, 4])
    #a = np.array([1, 1, 2, 2])
    b = tf.map_fn(lambda x: tf.equal(dist(x, aa), 5), a, dtype=tf.bool)
    print(b.eval())

[ True False]
