In [0]:
import cv2
import numpy as np
import matplotlib.pyplot as plt


In [0]:
class BoundBox:
  # class for storing bounding box coordinates and confidence efficiently
  def __init__(self, xmin, ymin, xmax, ymax, confidence=None):
    self.xmin = xmin
    self.ymin = ymin
    self.xmax = xmax
    self.ymax = ymax
    self.confidence = confidence

In [0]:
def intersection_area(box1, box2):
  '''
  Computes intersection area of 2 rectangles 
  inputs - 
      box1, box2 - objects of class BoundBox, have attributes xmin, ymin, xmax, ymax
  outputs -
      area - (float - area of intersection)
  '''
  x1, y1, x2, y2 = box1.xmin, box1.ymin, box1.xmax, box1.ymax
  a1, b1, a2, b2 = box2.xmin, box2.ymin, box2.xmax, box2.ymax

  X1 = max(x1,a1)
  Y1 = max(y1,b1)

  X2 = min(x2, a2)
  Y2 = min(y2, b2)

  W = max(0, X2-X1 )
  H = max(0, Y2-Y1 )

  area = W*H
  return(area)


In [0]:
def box_iou(box1, box2):
  '''
  Computes Intersection Over Union of two bounding boxes
  inputs -
      box1, box2 - objects of class BoundBox, have attributes xmin, ymin, xmax, ymax
  outputs -
      IOU - (float between 0 and 1)
  '''
  x1, y1, x2, y2 = box1.xmin, box1.ymin, box1.xmax, box1.ymax
  a1, b1, a2, b2 = box2.xmin, box2.ymin, box2.xmax, box2.ymax
    
  area1 = (x2-x1)*(y2-y1)
  area2 = (a2-a1)*(b2-b1)
    
  area_intersection = intersection_area(box1, box2)
    
  area_union = area1 + area2 - area_intersection

  IOU = area_intersection/area_union
  return(IOU)

In [0]:
def decode(output, anchors, obj_threshold = 0.5, nms_threshold = 0.5):
  '''
  decodes output from model and outputs best bounding boxes
  inputs -
      output - output of model, shape - (grid_h, grid_w, nb_box, 5)
      anchors - anchor boxes 
      obj threshold - for confidence
      nms_threshold - for nonmax_suppression
  outputs - 
      bestboxes - filtered bounding boxes
  '''
  grid_h, grid_w, nb_box = output.shape[:3]
  boxes = []  # list containing all the predicted boxes which are objects of class BoundBox

  # apply sigmoid to confidence
  output[..., 4] = _sigmoid(output[..., 4])

  for row in range(grid_h):
    for col in range(grid_w):
      for b in range(nb_box):

        x, y, w, h = output[row, col, b, :4]
        # apply the decoding functions

        x = (col + _sigmoid(x)) / grid_w            # normalized wrt IMAGE_W
        y = (row + _sigmoid(x)) / grid_h            # normalized wrt IMAGE_H
        w = anchors[2 * b + 0] * np.exp(w) / grid_w # normalized wrt IMAGE_W
        h = anchors[2 * b + 1] * np.exp(h) / grid_h # normalized wrt IMAGE_H
        confidence = output[row, col, b, 4]
        box = BoundBox(x - w/2, y - h/2, x + w/2, y + h/2, confidence) # save to xmin, ymin, xmax, ymax format

        boxes.append(box)
  # apply object threshold
  fboxes = [box for box in boxes if box.confidence > obj_threshold]
  # apply non max suppression
  final_boxes = nonmax_suppression(fboxes, nms_threshold)
  return final_boxes

In [0]:
def nonmax_suppression(boxes, nms_threshold = 0.5):
  '''
  apply nonmax-suppression on predicted boxes 
  inputs -
      boxes - list of box objects
      nms_threshold - threshold value for nonmax-suppression
  outputs - 
      bestboxes - filtered boxes
  '''
  scores = np.array([box.confidence for box in boxes])
  sorted_scores = scores.argsort().tolist()
  index_list = []

  while(len(sorted_scores)):
    index = sorted_scores.pop()
    index_list.append(index)

    if(len(sorted_scores)==0):
      break
    # Stores iou of present box with other boxes
    iou_arr = []
    for i in sorted_scores:
      iou_arr.append(box_iou( boxes[index], boxes[i] ))
    iou_arr = np.array(iou_arr)
    filtered_indices = set((iou_arr > nms_threshold).nonzero()[0])

    # Removing all those boxes with iou > nms_iou with the chosen box
    sorted_scores = [value for (index, value) in enumerate(sorted_scores) if index not in filtered_indices]

  bestboxes = []
  for i in index_list:
    bestboxes.append(boxes[i])

  return bestboxes

In [0]:
def box_draw(image, boxes):
  image_h, image_w, _ = image.shape
  img = image
  for box in boxes:
    xmin = int(box.xmin * image_w)
    ymin = int(box.ymin * image_h)
    xmax = int(box.xmax * image_w)
    ymax = int(box.ymax * image_h)
    img = cv2.rectangle(img, (xmin, ymin), (xmax, ymax), color = (0,255,0), thickness = 2)
  plt.imshow(img)
  plt.show()

In [0]:
def compute_overlap(a, b):
  """
  Code originally from https://github.com/rbgirshick/py-faster-rcnn.
  Parameters
  ----------
  a: (N, 4) ndarray of float
  b: (K, 4) ndarray of float
  Returns
  -------
  overlaps: (N, K) ndarray of overlap between boxes and query_boxes
  """
  area = (b[:, 2] - b[:, 0]) * (b[:, 3] - b[:, 1])

  iw = np.minimum(np.expand_dims(a[:, 2], axis=1), b[:, 2]) - np.maximum(np.expand_dims(a[:, 0], 1), b[:, 0])
  ih = np.minimum(np.expand_dims(a[:, 3], axis=1), b[:, 3]) - np.maximum(np.expand_dims(a[:, 1], 1), b[:, 1])

  iw = np.maximum(iw, 0)
  ih = np.maximum(ih, 0)

  ua = np.expand_dims((a[:, 2] - a[:, 0]) * (a[:, 3] - a[:, 1]), axis=1) + area - iw * ih

  ua = np.maximum(ua, np.finfo(float).eps)

  intersection = iw * ih

  return intersection / ua  

In [0]:
def _sigmoid(x):
  return 1./(1. + np.exp(-x))

In [0]:
def checkiou():
  ## Test case 1: boxes intersect
  box1 = BoundBox(2, 1, 4, 3)
  box2 = BoundBox(1, 2, 3, 4) 
  print("iou for intersecting boxes = " + str(box_iou(box1, box2)))

  ## Test case 2: boxes do not intersect
  box1 = BoundBox(1,2,3,4)
  box2 = BoundBox(5,6,7,8)
  print("iou for non-intersecting boxes = " + str(box_iou(box1,box2)))

  ## Test case 3: boxes intersect at vertices only
  box1 = BoundBox(1,1,2,2)
  box2 = BoundBox(2,2,3,3)
  print("iou for boxes that only touch at vertices = " + str(box_iou(box1,box2)))

  ## Test case 4: boxes intersect at edge only
  box1 = BoundBox(1,1,3,3)
  box2 = BoundBox(2,3,3,4)
  print("iou for boxes that only touch at edges = " + str(box_iou(box1,box2)), '\n')

In [0]:
def checknms():
  a = BoundBox(0,0,1,1, 0.8)
  b = BoundBox(0,0,2,2, 0.7)
  c = BoundBox(0,0,3,3, 0.6)
  d = BoundBox(0,0,4,4, 0.5)
  l = [a,b,c,d]
  for ele in nonmax_suppression(l):
    print(ele.confidence)

def checkdecode_overlap():
  out = np.random.randn(7,7,5,5)
  anchors = [1,1,2,2,3,3,4,4,5,5]
  boses = decode(out, anchors, 0.6,0.3)
  l1 = []
  for bos in boses:
    print(bos.confidence)
    l1.append([bos.xmin, bos.ymin, bos.xmax, bos.ymax])
  l1 = np.array(l1).reshape((len(l1), 4))
  print("\nNumber of predicted boxes = ", len(boses), '\n')
  print(compute_overlap(l1, np.random.randn(3,4)), '\n')


In [0]:
if __name__ =="__main__":
  checkiou()
  checknms()
  checkdecode_overlap()


iou for intersecting boxes = 0.14285714285714285
iou for non-intersecting boxes = 0.0
iou for boxes that only touch at vertices = 0.0
iou for boxes that only touch at edges = 0.0 

0.8
0.7
0.6
0.9562772356902175
0.9103324824514001
0.8937271942224178
0.8854682283851176
0.8627148728924207
0.8619930092146256
0.8558725541100238
0.8358134007434488
0.8255393269754264
0.8186077971455114
0.8135730574654882
0.8126515703297339
0.811142854404768
0.803845945459782
0.7999040179574621
0.7979060516974087
0.7922112948407793
0.7811946829272767
0.7800875679655584
0.7701204522987396
0.7673013760229938
0.7671992086730854
0.7626558052519475
0.758547270718583
0.7573024217401197
0.7507192189594432
0.7439879617033714
0.7298343352241475
0.7282012268326256
0.7209691751949878
0.7025805300414539
0.70018088313481
0.6983321827045919
0.6852404164154653
0.681206225862029
0.6789138714641968
0.6747285421199997
0.6654030932780045
0.6636111075169981
0.6542085810807828
0.6532487930226439
0.6482490417005516
0.6421406066854