In [0]:
# generated new anchors for size 416
import numpy as np
import random
import os

In [2]:
# Mount google drive
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [3]:
import pickle
path = '/content/drive/My Drive/SVT'

with open(os.path.join(path, 'train_images_.pickle'), 'rb') as handle:
  train_images = pickle.load(handle)
print(train_images[0])

{'filename': '/content/drive/My Drive/SVT/img/14_03.jpg', 'lex': 'LIVING,ROOM,THEATERS,KENNY,ZUKE,DELICATESSEN,CLYDE,COMMON,ACE,HOTEL,PORTLAND,ROSE,CITY,BOOKS,STUMPTOWN,COFFEE,ROASTERS,RED,CAP,GARAGE,FISH,GROTTO,SEAFOOD,RESTAURANT,AURA,RESTAURANT,LOUNGE,ROCCO,PIZZA,PASTA,BUFFALO,EXCHANGE,MARK,SPENCER,LIGHT,FEZ,BALLROOM,READING,FRENZY,ROXY,SCANDALS,MARTINOTTI,CAFE,DELI,CROWSENBERG,HALF', 'height': 880, 'width': 1280, 'object': [{'xmin': 375, 'ymin': 253, 'xmax': 611, 'ymax': 328, 'label': 'LIVING'}, {'xmin': 639, 'ymin': 272, 'xmax': 814, 'ymax': 348, 'label': 'ROOM'}, {'xmin': 839, 'ymin': 283, 'xmax': 1120, 'ymax': 370, 'label': 'THEATERS'}]}


In [0]:
# Define the parameters, global variables to be used 
def initialize():
  IMAGE_DIMS = (416, 416)
  IMAGE_W = IMAGE_DIMS[1]
  IMAGE_H = IMAGE_DIMS[0]

  GRID_DIMS = (32,32)
  GRID_WSIZE = GRID_DIMS[1]
  GRID_HSIZE = GRID_DIMS[0]

  GRID_W = IMAGE_W//GRID_WSIZE
  GRID_H = IMAGE_H//GRID_HSIZE

  TRUE_BOX_BUFFER = 50
  LAMBDA_COORD = 1
  LAMBDA_NO_OBJECT = 1
  LAMBDA_OBJECT = 5
  BATCH_SIZE = 6
  config_dict = {
    'IMAGE_W'         : IMAGE_W, 
    'IMAGE_H'         : IMAGE_H,
    'GRID_WSIZE'      : GRID_WSIZE,
    'GRID_HSIZE'      : GRID_HSIZE,
    'GRID_W'          : GRID_W,  
    'GRID_H'          : GRID_H,

    'TRUE_BOX_BUFFER' : TRUE_BOX_BUFFER,
    'LAMBDA_COORD'     : LAMBDA_COORD,
    'LAMBDA_NO_OBJECT' : LAMBDA_NO_OBJECT,
    'LAMBDA_OBJECT'    : LAMBDA_OBJECT,
    'BATCH_SIZE'      : BATCH_SIZE
  }

  return(config_dict)
config = initialize()

In [0]:
def IOU(ann, centroids):
  '''
  computes iou between w,h from ann and c_w, c_h from centroids
  inputs - 
    ann - tuple (w,h) from image list
    centroids - computed anchors
  outputs - 
    similarities - np array with ious
  '''
  w, h = ann # unpacking w and h from wh
  similarities = []
  for centroid in centroids:
    c_w, c_h = centroid

    if c_w >= w and c_h >= h:
      similarity = w*h/(c_w*c_h)
    elif c_w >= w and c_h <= h:
      similarity = w*c_h/(w*h + (c_w-w)*c_h)
    elif c_w <= w and c_h >= h:
      similarity = c_w*h/(w*h + c_w*(c_h-h))
    else: # both w,h are bigger than c_w and c_h respectively
      similarity = (c_w*c_h)/(w*h)
    similarities.append(similarity) 

  return np.array(similarities)

In [0]:
def avg_IOU(anns, centroids):
  '''
  computes avg iou
  inputs - 
    anns - list of tuples (w,h) from annotations
    centroids - output of kmeans - anchors
  outputs -
    avg_iou - float value 
  '''
  n,d = anns.shape
  sum = 0.

  for i in range(anns.shape[0]):
    sum+= max(IOU(anns[i], centroids))  # anns[i] is a tuple (w, h) 

  return sum/n

In [0]:
def print_anchors(centroids):
  '''
  prints computed anchors and save it in config dict
  input - centroid - computed anchors
  save anchors array in file
  '''
  anchors = centroids.copy()

  widths = anchors[:, 0]
  sorted_indices = np.argsort(widths)

  r = "anchors: ["
  for i in sorted_indices[:-1]:
    r += '%0.2f,%0.2f, ' % (anchors[i,0], anchors[i,1])

  #there should not be comma after last anchor, that's why
  r += '%0.2f,%0.2f' % (anchors[sorted_indices[-1:],0], anchors[sorted_indices[-1:],1])
  r += "]"

  print(r)

  # save config to file
  np.save(os.path.join(path, 'anchors_.npy'), anchors)  # using global variable path

In [0]:
def run_kmeans(ann_dims, anchor_num):
  '''
  generates anchor boxes
  inputs - 
    ann_dims - dimensions of ann list which contains tuples (wi, hi)
    anchor_num - desired number of anchors
  outputs - 
    centroids - computed anchors
  '''
  ann_num = ann_dims.shape[0]
  iterations = 0
  prev_assignments = np.ones(ann_num)*-1
  iteration = 0 
  old_distances = np.zeros((ann_num, anchor_num))
  indices = [random.randrange(ann_dims.shape[0]) for i in range(anchor_num)]
  centroids = ann_dims[indices]
  anchor_dim = ann_dims.shape[1]

  while True:
    distances = []
    iteration += 1
    for i in range(ann_num):
      d = 1 - IOU(ann_dims[i], centroids)
      distances.append(d)
    distances = np.array(distances) # distances.shape = (ann_num, anchor_num)

    print("iteration {}: dists = {}".format(iteration, np.sum(np.abs(old_distances-distances))))

    #assign samples to centroids
    assignments = np.argmin(distances,axis=1)

    if (assignments == prev_assignments).all() :
      return centroids

    #calculate new centroids
    centroid_sums=np.zeros((anchor_num, anchor_dim), np.float)
    for i in range(ann_num):
      centroid_sums[assignments[i]]+=ann_dims[i]
    for j in range(anchor_num):
      centroids[j] = centroid_sums[j]/(np.sum(assignments==j) + 1e-6)

    prev_assignments = assignments.copy()
    old_distances = distances.copy()

In [0]:
def datagen(mlist):
  '''
  generates list of bbox width and height(scaled wrt image_width) from training images
  inputs - 
    mlist - list of dicts with annotation information
  outputs - 
    wh - list of tuples (w,h) for each box in each image
  global variable used - 
    config - dict with init information
  '''
  wh = []
  for image in mlist:
    cell_w = float(image['width'])/float(config['GRID_W'])
    cell_h = float(image['height'])/float(config['GRID_H'])
    for obj in image['object']:
      w = (float(obj['xmax']) - float(obj['xmin']))/cell_w
      h = (float(obj['ymax']) - float(obj['ymin']))/cell_h
      wh.append(tuple(map(float, (w,h))))
  wh = np.array(wh)
  print('wh shape ={}'.format(wh.shape))
  print('data ready for anchor generation')
  return wh

In [0]:
def main(num_anchors):
  '''
  create list of width and height from annotations for running k-means
  run kmeans to get centroids
  print the results of k-means
  '''
  
  annotation_dims = datagen(train_images)
  centroids = run_kmeans(annotation_dims, num_anchors)
  
  print('\naverage IOU for', num_anchors, 'anchors:', '{}'.format(avg_IOU(annotation_dims, centroids)))
  print_anchors(centroids)

In [11]:
main(num_anchors = 5)

wh shape =(632, 2)
data ready for anchor generation
iteration 1: dists = 1944.982598754461
iteration 2: dists = 155.81938587066549
iteration 3: dists = 70.91337830372696
iteration 4: dists = 45.609289218194505
iteration 5: dists = 37.33483060041165
iteration 6: dists = 35.121263744262045
iteration 7: dists = 29.53644971671598
iteration 8: dists = 33.558983327252726
iteration 9: dists = 32.98399108068922
iteration 10: dists = 23.588184485687393
iteration 11: dists = 19.454777720382626
iteration 12: dists = 25.2295646923385
iteration 13: dists = 22.794049758235523
iteration 14: dists = 20.395780322165546
iteration 15: dists = 21.492252645606293
iteration 16: dists = 29.099341284165057
iteration 17: dists = 28.473443495196292
iteration 18: dists = 28.072632728828793
iteration 19: dists = 29.591832630646017
iteration 20: dists = 24.004301170722513
iteration 21: dists = 20.814994863882777
iteration 22: dists = 14.478811612710977
iteration 23: dists = 15.375808672412806
iteration 24: dists =