#### This module preprocesses the data to create the regression and classification labels used by the Region Proposing Network

In [23]:
import numpy as np

SCALE = 600/18
OUT_LEN = 17

def get_box_extra(y1, x1, y2, x2):
    center_x = (x1 + x2) // 2
    center_y = (y1 + y2) // 2
    width = x2 - x1
    height = y2 - y1  
    return center_x, center_y, width, height

def create_anchors_map():
    anchors_map = np.zeros((17, 17, 3, 3), dtype=[('y1', 'i4'),('x1', 'i4'), ('y2', 'i4'), ('x2', 'i4')])
    for i in range(17):
        for j in range(17):
            for r, ratio in enumerate(((1, 1), (0.75, 1.5), (1.5, 0.75))):
                for s, size in enumerate((128, 256, 512)):
                    anchor_x_center = i * SCALE
                    anchor_x1 = anchor_x_center - ratio[1] * size / 2
                    anchor_x2 = anchor_x_center + ratio[1] * size / 2
                    
                    anchor_y_center = j * SCALE
                    anchor_y1 = anchor_y_center - ratio[0] * size / 2
                    anchor_y2 = anchor_y_center + ratio[0] * size / 2
                        
                    anchors_map[i][j][r][s] = (anchor_y1, anchor_x1, anchor_y2, anchor_x2)
    return anchors_map
                        
anchors_map = create_anchors_map()

In [3]:
def prepare_output_values(row_dict):
    # output of last regression layer per image: (17, 17, 36) 
    # 17 anchors and 4 (dimensions) * 9 (scales & sizes)

    y_regr = np.zeros((17,17,3,3,4)) + 100
    y_class = np.zeros((17,17,3,3))
    
    for obj in row_dict['objects']['bbox']:
        
        groundtruth_y1, groundtruth_x1, groundtruth_y2, groundtruth_x2 = bbox_perc_to_pixels(obj)
        groundtruth_center_x, groundtruth_center_y, groundtruth_width, groundtruth_height = get_box_extra(
                                            groundtruth_y1, groundtruth_x1, groundtruth_y2, groundtruth_x2)
        ###################
        ## REGRESSION
        anchor_center_x = (anchors_map['x1'] + anchors_map['x2']) // 2
        anchor_center_y = (anchors_map['y1'] + anchors_map['y2']) // 2
        anchor_width = anchors_map['x2'] - anchors_map['x1']
        anchor_height = anchors_map['y2'] - anchors_map['y1']

        current_r = np.zeros(y_regr.shape)
        current_r[:,:,:,:,0] = (groundtruth_center_x - anchor_center_x) / anchor_width # t_x
        current_r[:,:,:,:,1] = (groundtruth_center_y - anchor_center_y) / anchor_height # t_y
        current_r[:,:,:,:,2] = np.log(groundtruth_width / anchor_width) # t_w
        current_r[:,:,:,:,3] = np.log(groundtruth_height / anchor_height) # t_h
        
        # Overwrite anchors distances closer to ground-truth object.
        # cloer = minimum sum of (t_x, t_y, t_w, t_h)
        current_r_sum = np.sum(np.abs(current_r), axis = -1)
        y_regr_sum = np.sum(np.abs(y_regr), axis = -1)        
        y_regr[current_r_sum < y_regr_sum] = current_r[current_r_sum < y_regr_sum] # TODO Is this correct?
        # Doesn't it overwrite only the last axis?
        
        ###################
        ## CLASSIFICATION
        x1 = np.maximum(groundtruth_x1, anchors_map['x1'])
        y1 = np.maximum(groundtruth_y1, anchors_map['y1'])
        x2 = np.minimum(groundtruth_x2, anchors_map['x2'])
        y2 = np.minimum(groundtruth_y2, anchors_map['y2']) 
        intersection_area = np.maximum(0, x2 - x1) * np.maximum(0, y2 - y1)
        
        # Intersection over Union
        groundtruth_area = (groundtruth_x2 - groundtruth_x1) * (groundtruth_y2 - groundtruth_y1)
        anchor_area = anchor_width * anchor_height
        current_iou = intersection_area / (groundtruth_area + anchor_area - intersection_area)  
        
        # Overwrite the IOU if ground-truth objects with higher iou were found
        y_class = np.maximum(y_class, current_iou)

    return y_regr, y_class


def anchor_and_distance_to_groundtruth(anchor_y1, anchor_x1, anchor_y2, anchor_x2, distance):
    t_x, t_y, t_w, t_h  = distance
    
    anchor_center_x, anchor_center_y, anchor_width, anchor_height = get_box_extra(
                                    anchor_y1, anchor_x1, anchor_y2, anchor_x2)
    
    groundtruth_center_x = anchor_center_x + t_x * anchor_width
    groundtruth_center_y = anchor_center_y + t_y * anchor_height
    groundtruth_width = anchor_width * np.e ** t_w
    groundtruth_height = anchor_height * np.e ** t_h
    
    return groundtruth_center_x - groundtruth_width / 2, \
           groundtruth_center_y - groundtruth_height / 2, groundtruth_width, groundtruth_height    