#### This module preprocesses the data to create the regression and classification labels used by the Region Proposing Network

In [6]:
import numpy as np

In [7]:
# y1 = y start, y2 = y finish
# groundtruth_x..y are single values
# anchor x..y are (17,17) arrays

SCALE = 600/17
OUT_LEN = 17

def get_all_anchors():
    for i in range(17):
        for j in range(17):
            for r, ratio in enumerate(((1, 1), (1, 2), (2, 1))):
                for s, size in enumerate((128, 256, 512)):
                    
                    anchors_x1 = i * scale
                    anchors_x2 = anchors_x1 + ratio[1] * size            
                    
                    anchors_y1 = j * scale
                    anchors_y2 = anchors_y1 + ratio[0] * size
                    
                    yield anchor_y1, anchor_x1, anchor_y2, anchor_x2, i, j, r, s
                    
def get_box_extra(y1, x1, y2, x2):
    center_x = (x1 + x2) // 2
    center_y = (y1 + y2) // 2
    width = x2 - x1
    height = y2 - y1  
    return center_x, center_y, width, height
    
# these 4 values are the output of the regression layer
def generate_distance_between_groundtruth_and_anchor(
        groundtruth_center_x, groundtruth_center_y, groundtruth_width, groundtruth_height,
        anchor_center_x, anchor_center_y, anchor_width, anchor_height):
    
    tmp = np.zeros((OUT_LEN, OUT_LEN, 4))
    tmp[:,:,0] = (groundtruth_center_x - anchor_center_x) / anchor_width # t_x
    tmp[:,:,1] = (groundtruth_center_y - anchor_center_y) / anchor_height # t_y
    tmp[:,:,2] = np.log(groundtruth_width / anchor_width) * np.ones((OUT_LEN, OUT_LEN)) # t_w
    tmp[:,:,3] = np.log(groundtruth_height / anchor_height) * np.ones((OUT_LEN, OUT_LEN)) # t_h
    
    return tmp

def anchor_and_distance_to_groundtruth(anchor_y1, anchor_x1, anchor_y2, anchor_x2, distance):
    t_x, t_y, t_w, t_h  = distance
    
    anchor_center_x, anchor_center_y, anchor_width, anchor_height = get_box_extra(
                                    anchor_y1, anchor_x1, anchor_y2, anchor_x2)
    
    groundtruth_x1 = anchor_center_x + t_x * anchor_box_width
    groundtruth_y1 = anchor_center_x + t_y * anchor_box_height
    groundtruth_width = anchor_box_width * np.e ** t_w
    groundtruth_height = anchor_box_height * np.e ** t_h
    
    return groundtruth_x1, groundtruth_y1, groundtruth_width, groundtruth_height
        


# It computes the distance between each anchor and the closest groundtruth-truth box
def prepare_regression_values(groundtruth_y1, groundtruth_x1, groundtruth_y2, groundtruth_x2,
                              anchor_y1, anchor_x1, anchor_y2, anchor_x2): 
    
    groundtruth_center_x, groundtruth_center_y, groundtruth_width, groundtruth_height = get_box_extra(
                                    groundtruth_y1, groundtruth_x1, groundtruth_y2, groundtruth_x2)
    
    anchor_center_x, anchor_center_y, anchor_width, anchor_height = get_box_extra(
                                    anchor_y1, anchor_x1, anchor_y2, anchor_x2)
    
    return generate_distance_between_groundtruth_and_anchor(
        groundtruth_center_x, groundtruth_center_y, groundtruth_width, groundtruth_height,
        anchor_center_x, anchor_center_y, anchor_width, anchor_height
    )

def prepare_regression_values_test():    
    anchor_x1 = np.array([[0]])
    anchor_y1 = np.array([[0]])
    anchor_x2 = np.array([[1]])    
    anchor_y2 = np.array([[1]])
    
    groundtruth_y1, groundtruth_x1, groundtruth_y2, groundtruth_x2 = [1, 1, 2, 2]
    
    tmp = prepare_regression_values(groundtruth_y1, groundtruth_x1, groundtruth_y2, groundtruth_x2,
                                    anchor_y1, anchor_x1, anchor_y2, anchor_x2)
    
    assert np.array_equal(tmp, np.array([[[1, 1, 0, 0]]]))
        
prepare_regression_values_test()

AssertionError: 

In [5]:
# groundtruth_x..y are single values
# anchor x..y are (17,17) arrays
def prepare_classification_values(groundtruth_y1, groundtruth_x1, groundtruth_y2, groundtruth_x2,
                              anchor_y1, anchor_x1, anchor_y2, anchor_x2):
    # determine the (x, y)-coordinates of the intersection rectangle
    ones = np.ones((OUT_LEN, OUT_LEN))
    x1 = ones * np.maximum(groundtruth_x1, anchor_x1)
    y1 = ones * np.maximum(groundtruth_y1, anchor_y1)
    x2 = ones * np.minimum(groundtruth_x2, anchor_x2)
    y2 = ones * np.minimum(groundtruth_y2, anchor_y2) 
    
    intersection_area = np.maximum(0, x2 - x1) * np.maximum(0, y2 - y1)
    
    groundtruth_area = (groundtruth_x2 - groundtruth_x1) * (groundtruth_y2 - groundtruth_y1)
    anchor_area = (anchor_x2 - anchor_x1) * (anchor_y2 - anchor_y1)
    
    iou = intersection_area / (groundtruth_area + anchor_area - intersection_area)    
    return iou   

def prepare_classification_values_test():
    anchor_x1 = np.array([[1]])
    anchor_y1 = np.array([[1]])
    anchor_x2 = np.array([[3]])    
    anchor_y2 = np.array([[3]])
    
    groundtruth_y1, groundtruth_x1, groundtruth_y2, groundtruth_x2 = [0, 0, 2, 2]
    
    tmp = prepare_classification_values(groundtruth_y1, groundtruth_x1, groundtruth_y2, groundtruth_x2,
                                    anchor_y1, anchor_x1, anchor_y2, anchor_x2)
    
    assert -0.001 < tmp[0][0] - (1/7) < 0.001
    
def prepare_classification_values_test_adhoc():
    anchor_x1 = np.array([[1]])
    anchor_y1 = np.array([[1]])
    anchor_x2 = np.array([[10]])    
    anchor_y2 = np.array([[10]])
    
    groundtruth_y1, groundtruth_x1, groundtruth_y2, groundtruth_x2 = [1, 1, 3, 3]
    
    tmp = prepare_classification_values(groundtruth_y1, groundtruth_x1, groundtruth_y2, groundtruth_x2,
                                    anchor_y1, anchor_x1, anchor_y2, anchor_x2)
    
    print(tmp)
    
prepare_classification_values_test()
prepare_classification_values_test_adhoc()

[[0.04938272 0.04938272 0.04938272 0.04938272 0.04938272 0.04938272
  0.04938272 0.04938272 0.04938272 0.04938272 0.04938272 0.04938272
  0.04938272 0.04938272 0.04938272 0.04938272 0.04938272]
 [0.04938272 0.04938272 0.04938272 0.04938272 0.04938272 0.04938272
  0.04938272 0.04938272 0.04938272 0.04938272 0.04938272 0.04938272
  0.04938272 0.04938272 0.04938272 0.04938272 0.04938272]
 [0.04938272 0.04938272 0.04938272 0.04938272 0.04938272 0.04938272
  0.04938272 0.04938272 0.04938272 0.04938272 0.04938272 0.04938272
  0.04938272 0.04938272 0.04938272 0.04938272 0.04938272]
 [0.04938272 0.04938272 0.04938272 0.04938272 0.04938272 0.04938272
  0.04938272 0.04938272 0.04938272 0.04938272 0.04938272 0.04938272
  0.04938272 0.04938272 0.04938272 0.04938272 0.04938272]
 [0.04938272 0.04938272 0.04938272 0.04938272 0.04938272 0.04938272
  0.04938272 0.04938272 0.04938272 0.04938272 0.04938272 0.04938272
  0.04938272 0.04938272 0.04938272 0.04938272 0.04938272]
 [0.04938272 0.04938272 0.0493

AssertionError: 

In [None]:
def overwrite_anchors_distances_closer_to_object(y_regr, curr_dist, pos):
    t_new = np.sum(curr_dist, axis = 2)
    t_old = np.sum(y_regr[:,:,:,pos], axis = 2)
    for i in range(17):
        for j in range(17):
            # only overwrite the anchors that are closer to the current groundtruth truth object,
            # or that haven't been initialized
            if t_old[i, j] == -4 or t_old[i, j] > t_new[i, j]:
                y_regr[i,j,:,pos] = curr_dist[i, j, :]
                    
def overwrite_anchors_iou_closer_to_object(y_class, new_iou, pos):
    for i in range(17):
        for j in range(17):
            # only overwrite the anchors that are closer to the current groundtruth truth object,
            # or that haven't been initialized
            if y_class[i, j, pos] == -1 or y_class[i, j, pos] < new_iou[i, j]:
                y_class[i,j, pos] = new_iou[i, j]    

def prepare_output_values(row_dict):
    # output of last regression layer per image: (17, 17, 36) 
    # 17 anchors and 4 (dimensions) * 9 (scales & sizes)

    y_regr = np.zeros((17,17,4,9)) - 1
    y_class = np.zeros((17,17,9)) - 1
    scale = 600/17
        
    for anchor_y1, anchor_x1, anchor_y2, anchor_x2, _, _, _, _ in get_all_anchors():
                
            for obj in row_dict['objects']['bbox']:
                
                groundtruth_y1, groundtruth_x1, groundtruth_y2, groundtruth_x2 = bbox_perc_to_pixels(obj)
                
                curr_dist = prepare_regression_values(groundtruth_y1, groundtruth_x1, groundtruth_y2, groundtruth_x2,
                                                anchors_y1, anchors_x1, anchors_y2, anchors_x2)
                #overwrite_anchors_distances_closer_to_object(y_regr, curr_dist, r*3+s)
                
                iou=prepare_classification_values(groundtruth_y1, groundtruth_x1, groundtruth_y2, groundtruth_x2,
                                                  anchors_y1, anchors_x1, anchors_y2, anchors_x2)
                overwrite_anchors_iou_closer_to_object(y_class, iou, r*3+s)
                                
    return y_regr, y_class

regression_values_dataset = np.zeros((TRAIN_SIZE,17,17,4,9))
classification_values_dataset = np.zeros((TRAIN_SIZE,17,17,9))

for i, row in enumerate(train):
    regression_values_dataset[i], classification_values_dataset[i] = prepare_output_values(row)
regression_values_dataset = regression_values_dataset.reshape((TRAIN_SIZE,17,17,36))

In [None]:
#MIN = regression_values_dataset.min()
#regression_values_dataset += (0-MIN)

#NORMALIZED_VALUE = regression_values_dataset.max()
#regression_values_dataset = regression_values_dataset / NORMALIZED_VALUE

In [None]:
np.save('regression_values_dataset.npy', regression_values_dataset)
np.save('classification_values_dataset.npy', classification_values_dataset)