#### This module includes preprocessing the data to create the regression and classification labels used by the Region Proposing Network

In [None]:
# y1 = y start, y2 = y finish
# ground_x..y are single values
# anchor x..y are (16,16) arrays
def prepare_regression_values(ground_y1, ground_x1, ground_y2, ground_x2,
                              anchor_y1, anchor_x1, anchor_y2, anchor_x2, out_len=16): 
    groundtruth_box_center_x = (ground_x1 + ground_x2) // 2
    groundtruth_box_center_y = (ground_y1 + ground_y2) // 2
    groundtruth_box_width = ground_x2 - ground_x1
    groundtruth_box_height = ground_y2 - ground_y1

    anchor_box_center_x = (anchor_x1 + anchor_x2) // 2
    anchor_box_center_y = (anchor_y1 + anchor_y2) // 2
    anchor_box_width = anchor_x2 - anchor_x1
    anchor_box_height = anchor_y2 - anchor_y1
    
    # these 4 values are the output of the regression layer
    tmp = np.zeros((out_len, out_len, 4))
    tmp[:,:,0] = (groundtruth_box_center_x - anchor_box_center_x) / anchor_box_width # t_x
    tmp[:,:,1] = (groundtruth_box_center_y - anchor_box_center_y) / anchor_box_height # t_y
    tmp[:,:,2] = np.log(groundtruth_box_width / anchor_box_width) * np.ones((out_len, out_len)) # t_w
    tmp[:,:,3] = np.log(groundtruth_box_height / anchor_box_height) * np.ones((out_len, out_len)) # t_h
    
    return tmp

def prepare_regression_values_test():    
    anchor_x1 = np.array([[0]])
    anchor_y1 = np.array([[0]])
    anchor_x2 = np.array([[1]])    
    anchor_y2 = np.array([[1]])
    
    ground_y1, ground_x1, ground_y2, ground_x2 = [1, 1, 2, 2]
    
    tmp = prepare_regression_values(ground_y1, ground_x1, ground_y2, ground_x2,
                                    anchor_y1, anchor_x1, anchor_y2, anchor_x2, out_len=1)
    
    assert np.array_equal(tmp, np.array([[[1, 1, 0, 0]]]))
        
prepare_regression_values_test()

In [None]:
# ground_x..y are single values
# anchor x..y are (16,16) arrays
def prepare_classification_values(ground_y1, ground_x1, ground_y2, ground_x2,
                              anchor_y1, anchor_x1, anchor_y2, anchor_x2, out_len=16):
    # determine the (x, y)-coordinates of the intersection rectangle
    ones = np.ones((out_len, out_len))
    x1 = np.maximum(ones * ground_x1, anchor_x1)
    y1 = np.maximum(ones * ground_y1, anchor_y1)
    x2 = np.minimum(ones * ground_x2, anchor_x2)
    y2 = np.minimum(ones * ground_y2, anchor_y2) 
    intersection_area = np.maximum(0, x2 - x1) * np.maximum(0, y2 - y1)
    
    ground_area = (ground_x2 - ground_x1) * (ground_y2 - ground_y1)
    anchor_area = (anchor_x2 - anchor_x1) * (anchor_y2 - anchor_y1)

    iou = intersection_area / (ground_area + anchor_area - intersection_area)    
    return iou   

def prepare_classification_values_test():
    anchor_x1 = np.array([[1]])
    anchor_y1 = np.array([[1]])
    anchor_x2 = np.array([[3]])    
    anchor_y2 = np.array([[3]])
    
    ground_y1, ground_x1, ground_y2, ground_x2 = [0, 0, 2, 2]
    
    tmp = prepare_classification_values(ground_y1, ground_x1, ground_y2, ground_x2,
                                    anchor_y1, anchor_x1, anchor_y2, anchor_x2, out_len=1)
    
    assert -0.001 < tmp[0][0] - (1/7) < 0.001
    
prepare_classification_values_test()

In [None]:
def overwrite_anchors_distances_closer_to_object(y_regr, curr_dist, pos):
    t_new = np.sum(curr_dist, axis = 2)
    t_old = np.sum(y_regr[:,:,:,pos], axis = 2)
    for i in range(16):
        for j in range(16):
            # only overwrite the anchors that are closer to the current ground truth object,
            # or that haven't been initialized
            if t_old[i, j] == -4 or t_old[i, j] > t_new[i, j]:
                y_regr[i,j,:,pos] = curr_dist[i, j, :]
                    
def overwrite_anchors_iou_closer_to_object(y_class, new_iou, pos):
    for i in range(16):
        for j in range(16):
            # only overwrite the anchors that are closer to the current ground truth object,
            # or that haven't been initialized
            if y_class[i, j, pos] == -1 or y_class[i, j, pos] < new_iou[i, j]:
                y_class[i,j, pos] = new_iou[i, j]    

def prepare_output_values(row_dict):
    # output of last regression layer per image: (16, 16, 36) 
    # 16 anchors and 4 (dimensions) * 9 (scales & sizes)

    y_regr = np.zeros((16,16,4,9)) - 1
    y_class = np.zeros((16,16,9)) - 1
    scale = 600/16
        
    for r, ratio in enumerate(((1, 1), (1, 2), (2, 1))):
        for s, size in enumerate((128, 256, 512)):
            
            anchors_x1 = np.ones((16,16)) * np.arange(16) * scale
            anchors_x2 = anchors_x1 + ratio[1] * size
            
            anchors_y1 = np.ones((16,16)) * np.arange(16) * scale
            anchors_y2 = anchors_y1 + ratio[0] * size
                
            for obj in row_dict['objects']['bbox']:
                
                ground_y1, ground_x1, ground_y2, ground_x2 = bbox_perc_to_pixels(obj)
                
                curr_dist = prepare_regression_values(ground_y1, ground_x1, ground_y2, ground_x2,
                                                anchors_y1, anchors_x1, anchors_y2, anchors_x2)
                overwrite_anchors_distances_closer_to_object(y_regr, curr_dist, r*3+s)
                
                iou=prepare_classification_values(ground_y1, ground_x1, ground_y2, ground_x2,
                                                  anchors_y1, anchors_x1, anchors_y2, anchors_x2)
                overwrite_anchors_iou_closer_to_object(y_class, iou, r*3+s)
                                
    return y_regr, y_class

regression_values_dataset = np.zeros((TRAIN_SIZE,16,16,4,9))
classification_values_dataset = np.zeros((TRAIN_SIZE,16,16,9))

for i, row in enumerate(train):
    regression_values_dataset[i], classification_values_dataset[i] = prepare_output_values(row)
regression_values_dataset = regression_values_dataset.reshape((TRAIN_SIZE,16,16,36))

In [None]:
MIN = regression_values_dataset.min()
regression_values_dataset += (0-MIN)

NORMALIZED_VALUE = regression_values_dataset.max()
regression_values_dataset = regression_values_dataset / NORMALIZED_VALUE

In [None]:
np.count_nonzero(classification_values_dataset.reshape(-1) > 0.7)