In [1]:
import numpy as np
import math

In [2]:
def generate_anchor_boxes_scales(num_conv_layers=2):
    """This function generates different scales of anchor boxes for different convolutional layers 
    added to the pretrained convolutional base of any pretrained network. We are assuming that we have 
    added one more layer on the top of pretrained convolutional base, therefore it will result in object 
    detection on different objects on two different scales. 
    Scales : Sizes of anchor boxes in terms of Height and Width
    
    Parameters:
                num_conv_layers (int) : Number of convolutional layers added to the top of pretrained 
                                        convolutional base
    Returning:
                different_scales (list) : Different scales of the anchor boxes
    """
    
    scaling_factors = np.linspace(0.2,0.9,(num_conv_layers+1))
    #The three different scales considered by our detection are given by:
    #0.2 (the size of one anchor box will be 1/5th of the overall size of the image in both width and height)
    #0.55 (the size of one anchor box will be almost 1/2 of the overall size of the image in both width and height)
    #0.9 (the size of one anchor box will be 90 % of the overall size of the image in both width and height)
    
    different_scales = []
    
    for i in range(len(scaling_factors)-1):
        scale = [scaling_factors[i],math.sqrt(scaling_factors[i]*scaling_factors[i+1])]
        different_scales.append(scale)
        
    return different_scales

In [3]:
def generate_anchor_boxes(feature_map_shape,frame_shape,multiscale_index=0,num_conv_layers=2,
                         aspect_ratios=[1,2,0.5]):
    """This function generates anchor boxes on different aspect ratios for a given scale, defined by the
    size of the feature map output by a specific convolutional layer of the network.
    Parameters:
                feature_map_shape(list or a tuple) : Shape of the feature map as a result of output from
                                                     a convolutional layer
                frame_shape (list or tuple) : Shape of the frame which is given as an input to the network
                multiscale_index (int) : For which layer or scale of the convolutional layer, anchor boxes
                                         need to be fetched
                num_conv_layers (int) : Number of convolutional layers on the top of pretrained network
                aspect_ratios (list) : Number of aspect ratios which need to be taken into consideration
                                       for a specific scale of anchor boxes
    Returning:
                generated_anchor_boxes (tensor) : anchor boxes per feature map size as a result of output
                                                  from the convolutional layer of the network
    """
    #In order to generate anchor boxes for different convolutional layers, first it's determined that what
    #will be the scale of anchor boxes for a specific convolutional layer and then apply different aspect
    #ratios on that scale
    different_scales = generate_anchor_boxes_scales(num_conv_layers)
    anchor_box_scale_per_layer = different_scales[multiscale_index]
    
    aspect_ratios_per_anchor_box = len(aspect_ratio) + 1
    
    frame_height,frame_width,_ = frame_shape
    
    feature_map_height,feature_map_width,_ = feature_map_shape
    
    #Now, we are going to finally get the scaled dimensions of the input image according to the specific 
    #convolutional layer (or scale)
    scaled_height = frame_height * anchor_box_scale_per_layer[0]
    scaled_width = frame_width * anchor_box_scale_per_layer[0]
    
    #Let's try to fetch different aspect ratio dimensions for the same scaled dimensions according to the 
    #specific convolutional layer
    
    all_aspect_ratios_per_scale = []
    
    for aspect_ratio in aspect_ratios:
        aspect_ratiod_width = scaled_width * math.sqrt(aspect_ratio)
        aspect_ratiod_height = scaled_height / math.sqrt(aspect_ratio)
        all_aspect_ratios_per_scale.append([aspect_ratiod_width,aspect_ratiod_height])
        
    #Finally, we have added all the aspect ratios according to the specific scale except for the 
    #alternative aspect ratio for 1. So, let's add alternative aspect ratio for 1. 
    
    aspect_ratiod_width = scaled_width * anchor_box_scale_per_layer[1]
    aspect_ratiod_height = scaled_height * anchor_box_scale_per_layer[1]
    all_aspect_ratios_per_scale.append([aspect_ratiod_width,aspect_ratiod_height])
    
    all_aspect_ratios_per_scale = np.array(all_aspect_ratios_per_scale)
    
    anchor_box_scale_width = frame_width / feature_map_width
    anchor_box_scale_height = frame_height / feature_map_height
    
    #Let's see how anchor box coordinates will be stored in the form of a tensor
    
    #Let's first find the x coordinate position of the top most left feature map point
    topmost_left_x = anchor_box_scale_width * 0.5
    
    #Let's now determine the x coordinate poistion of the top most right feature map point
    topmost_right_x = (feature_map_width * anchor_box_scale_width) - (0.5 * anchor_box_scale_width)
    
    #let's create x coordinate positions of feature map points between top most left feature map point 
    #and top most right feature map point at equally spaced intervals of anchor box width
    
    cx = np.linspace(topmost_left_x,topmost_right_x,feature_map_width)
    
    #Let's first find the y coordinate position of the top most left feature map point
    topmost_left_y = anchor_box_scale_height * 0.5
    
    #Let's now determine the y coordinate poistion of the top most right feature map point
    topmost_right_y = (feature_map_height * anchor_box_scale_height) - (0.5 * anchor_box_scale_height)
    
    #let's create y coordinate positions of feature map points between top most left feature map point 
    #and top most right feature map point at equally spaced intervals of anchor box height
    
    cy = np.linspace(topmost_left_y,topmost_right_y,feature_map_height)
    
    cx_grid, cy_grid = np.meshgrid(cx,cy)
    
    cx_grid = np.expand_dims(cx_grid,axis=-1)
    cy_grid = np.expand_dims(cy_grid,axis=-1)
    
    generated_anchor_boxes = np.zeros((feature_map_width,feature_map_height,aspect_ratios_per_anchor_box,4))
    
    generated_anchor_boxes[:,:,:,0] = np.tile(cx_grid,reps=(1,1,aspect_ratios_per_anchor_box))
    generated_anchor_boxes[:,:,:,1] = np.tile(cy_grid,reps=(1,1,aspect_ratios_per_anchor_box))
    generated_anchor_boxes[:,:,:,2] = all_aspect_ratios_per_scale[:,0]
    generated_anchor_boxes[:,:,:,3] = all_aspect_ratios_per_scale[:,1]
    
    return generated_anchor_boxes

In [5]:
def corner_to_center(corner_coordinates):
    
    """This function will convert corner format into center format.
    That is from (xmin,xmax,ymin,ymax) to (cx,cy,w,h)
    
    Parameters:
                corner_coordinates (tensor) : Coordinates of boxes in corner format
    
    Returning:
                center_coordinates (tensor) : Coordinates of boxes in center format
    """
    center_coordinates = np.copy(corner_coordinates).astype(np.float)
    
    center_coordinates[:,:,:,0] = 0.5 *(corner_coordinates[:,:,:,1] - corner_coordinates[:,:,:,0])
    center_coordinates[:,:,:,0] = center_coordinates[:,:,:,0] + corner_coordinates[:,:,:,0]
    center_coordinates[:,:,:,1] = 0.5 *(corner_coordinates[:,:,:,3] - corner_coordinates[:,:,:,2])
    center_coordinates[:,:,:,1] = center_coordinates[:,:,:,1] + corner_coordinates[:,:,:,2]
    
    center_coordinates[:,:,:,2] = corner_coordinates[:,:,:,1] - corner_coordinates[:,:,:,0]
    center_coordinates[:,:,:,3] = corner_coordinates[:,:,:,3] - corner_coordinates[:,:,:,2]
    
    return center_coordinates    

In [6]:
def center_to_corner(center_coordinates):
    
    """This function will convert center format into corner format.
    That is from (cx,cy,w,h) to (xmin,xmax,ymin,ymax)
    
    Parameters:
                center_coordinates (tensor) : Coordinates of boxes in center format
    
    Returning:
                corner_coordinates (tensor) : Coordinates of boxes in corner format
    """
    
    corner_coordinates = np.copy(center_coordinates).astype(np.float)
    
    corner_coordinates[:,:,:,0] = center_coordinates[:,:,:,0] - (0.5 * center_coordinates[:,:,:,2])
    corner_coordinates[:,:,:,1] = center_coordinates[:,:,:,0] + (0.5 * center_coordinates[:,:,:,2])
    corner_coordinates[:,:,:,2] = center_coordinates[:,:,:,1] - (0.5 * center_coordinates[:,:,:,3])
    corner_coordinates[:,:,:,3] = center_coordinates[:,:,:,1] + (0.5 * center_coordinates[:,:,:,3])
    
    return corner_coordinates

In [4]:
def fetch_all_positive_anchor_boxes(iou,num_unique_categories,anchor_boxes,gt_info,is_normalize,
                                   iou_threshold):
    """A function to fetch all the positive anchor boxes which are having good amount of overlap 
    with ground truth bounding boxes inside the frame(image). This function will calculate the normalized 
    (if is_normalize == True) offsets for all the positive anchor boxes as well as, it will also assign
    the categories to all the positive anchor boxes which will be the categories of ground truth bounding
    boxes.
    
    Parameters:
                iou (tensor) : IoU of each anchor box with each Ground Truth bounding box
                num_unique_categories (int) : Total number of categories in training data
                anchor_boxes (tensor) : anchor boxes coordinates per feature map
                gt_info (tensor) : Coordinates as well as class labels of objects present in the image
                is_normalize (bool) : Whether to use normalization on the offsets calculated for positive
                                      anchor boxes or not.
                iou_threshold (float) : If this value is less than 1 then the function will go for the second
                                        round to find out extra positive anchor boxes apart from the ones
                                        which it has already found out which were having maximum iou with 
                                        the ground truth bounding boxes.
    Returning:
                positive_anchor_boxes_categories (tensor) : Tensor of categories which have been assigned to all
                                                        the positive anchor boxes
                positive_anchor_boxes_offsets (tensor) : Normalized (if is_normalize == True) offsets for all the
                                                    positive anchor boxes
    """
    #Let's first try to find out all the positive anchor boxes which have the highest IoU with
    #ground truth bounding boxes among all the anchor boxes having different aspect ratios for a 
    #specific scale. 
    max_iou_anchor_boxes = np.argmax(iou,axis=0)
    
    #if the IoU threshold is less than 1 then we will go for second round of selection of credible positive
    #anchor boxes whicn have IoU with ground truth bounding boxes greater than the user selected IoU. 
    if iou_threshold < 1:
        secondary_pos_anchor_boxes = np.argwhere(iou > iou_threshold)
        
        #If we are getting some number of secondary positive anchor boxes then we have to determien their 
        #offsets as well as class labels (ground truth information). 
        if secondary_pos_anchor_boxes.size > 0:
            
            secondary_anchor_boxes = secondary_pos_anchor_boxes[:,0]
            secondary_anchor_boxes_categories = secondary_pos_anchor_boxes[:,1]
            
            secondary_anchor_boxes_gt_info = gt_info[secondary_anchor_boxes_categories]
            
            #Collecting all the primary as well as the secondary positive anchor boxes over an image
            all_positive_anchor_boxes = np.concatenate([max_iou_anchor_boxes,secondary_anchor_boxes],axis=0)
            
            #Collecting the ground truth bounding box coordinates as well as ground truth class labels for 
            #all the positive anchor boxes to calculate the offsets for all the positive anchor boxes. 
            all_positive_anchor_boxes_gt_info = np.concatenate([gt_info,secondary_anchor_boxes_gt_info],axis=0)
            
        else:
            
            #If we didn't get any secondary positive anchor boxes while comparing with IoU threshold then
            # our all positive anchor boxes will be simpy primary anchor boxes (max_iou_anchor_boxes)
            all_positive_anchor_boxes = max_iou_anchor_boxes
            
            #If we didn't get any secondary positive anchor boxes while comparing with IoU threshold then
            #the gt_info of all positive anchor boxes will be equal to the gt_info of all the primary anchor 
            #boxes
            all_positive_anchor_boxes_gt_info = gt_info
            
    else:
        
        all_positive_anchor_boxes = max_iou_anchor_boxes
        all_positive_anchor_boxes_gt_info = gt_info
        
    positive_anchor_boxes_categories = np.zeros((iou.shape[0],num_unique_categories))
    positive_anchor_boxes_categories[:,0] = 1
    positive_anchor_boxes_categories[all_positive_anchor_boxes,0] = 0
    
    all_positive_anchor_boxes = all_positive_anchor_boxes.reshape(all_positive_anchor_boxes.shape[0],1)
    all_positive_anchor_boxes_categories = all_positive_anchor_boxes_gt_info[:,4].reshape(
                                            all_positive_anchor_boxes_gt_info.shape[0],1)
    
    row_col = np.append(all_positive_anchor_boxes,all_positive_anchor_boxes_categories,axis=1)
    positive_anchor_boxes_categories[row_col[:,0],row_col[:,1]] = 1
    
    positive_anchor_boxes_offsets = np.zeros((iou.shape[0],4))
    
    if is_normalize == True:
        
        all_positive_anchor_boxes_gt_info = corner_to_center(all_positive_anchor_boxes_gt_info)
        anchor_boxes = corner_to_center(anchor_boxes)
        
        offsets_xy = all_positive_anchor_boxes_gt_info[:,0:2] - anchor_boxes[all_positive_anchor_boxes,0:2]
        offsets_xy = offsets_xy/anchor_boxes[all_positive_anchor_boxes,2:4]
        offsets_xy = offsets_xy/0.1
        
        offsets_wh = np.log(all_positive_anchor_boxes_gt_info[:,2:4]/anchor_boxes[all_positive_anchor_boxes,2:4])
        offsets_wh = offsets_wh/0.2
        
        offsets = np.concatenate([offsets_xy,offsets_wh],axis=1)
    
    else:
        
        offsets = all_positive_anchor_boxes_gt_info[:,0:4] - anchor_boxes[all_positive_anchor_boxes]
        
    positive_anchor_boxes_offsets[all_positive_anchor_boxes] = offsets
    
    return positive_anchor_boxes_categories,positive_anchor_boxes_offsets