In [1]:
import math

In [2]:
"""
The YOLO-loss function calculates the loss for a mini-batch of predictions (made by the YOLO algorithm)
and the corresponding ground-truth labels
Parameters:
    predictions (tensor): A tensor containing a mini-batch of prediction tensors by the YOLO algorithm
    targets (tensor): A tensor containing a mini_batch of ground-truth labels for the given predictions
    split_size (int): Dimension of the grid which is applied to the image
    num_boxes (int): Amount of bounding boxes which are predicted by the YOLO algorithm
    num_classes (int): Amount of classes which are being predicted
    lambda_coord (float): Hyperparameter for controlling the bounding box loss
    lambda_noobj (float): Hyperparameter for controlling the confidence loss for non objects
Returns:
    final_loss (float): The value calculated by the loss function for the given parameters
"""
def YOLO_Loss(predictions, targets, split_size, num_boxes, num_classes, lambda_coord, lambda_noobj):
    final_loss = 0 # Here will the final loss value be stored
    
    global cell_dim
    cell_dim = int(448 / split_size) # Size of a single cell

    # Loop through the batch and through every cell
    for sample in range(predictions.shape[0]):
        mid_loss = 0 # Loss of the centre coordinates
        dim_loss = 0 # Loss of the width and height values
        conf_loss = 0 # Loss of the confidence score
        conf_loss_noobj = 0 # Loss of the confidence score when there is no object in the cell
        class_loss = 0 # Loss of the class score
        for cell_h in range(split_size):
            for cell_w in range(split_size):
                # Check if the current cell contains an object
                if targets[sample, 0, cell_h, cell_w] != 1:
                    ########################################################################
                    # This codeblock calculates the loss if there is no object in the cell #
                    ########################################################################
                    # Find the predicted bounding box with the highest confidence 
                    best_box = 0
                    max_conf = 0
                    for box in range(num_boxes):
                        box_conf = predictions[sample, box*5, cell_h, cell_w]
                        if box_conf > max_conf:
                            max_conf = box_conf
                            best_box = box # Store the box order with the highest confidence
                    # Use the box with the highest confidence score for the conf_loss
                    conf_loss_noobj += (0 - predictions[sample, best_box*5, cell_h, cell_w])**2
                    continue
                
                #########################################################################
                # This coodeblock calculates the loss if there is an object in the cell #
                #########################################################################
                # Find the box with the highest IoU and use it as the final box for the loss
                best_box = 0
                max_iou = 0
                for box in range(num_boxes):
                    # Transform the box coordinates into the corner format
                    t_box_coords = MidToCorner(target[sample, 1:5, cell_h, cell_w])
                    p_box_coords = MidToCorner(predictions[sample, 1+box*5:5+box*5, cell_h, cell_w])
                    
                    box_score = IoU(t_box_coords, p_box_coords)
                    if box_score > max_iou:
                        max_iou = box_score
                        best_box = box # Store the box order with the highest IoU
                        
                # Calculates the loss for the centre coordinates
                x_loss = (targets[sample, 1, cell_h, cell_w] - predictions[sample, 1+best_box*5, cell_h, cell_w])**2        
                y_loss = (targets[sample, 2, cell_h, cell_w] - predictions[sample, 2+best_box*5, cell_h, cell_w])**2
                mid_loss += x_loss + y_loss
                
                # Calculates the loss for the width and height values
                w_loss = (math.sqrt(targets[sample, 3, cell_h, cell_w]) - math.sqrt(predictions[sample, 3+best_box*5, cell_h, cell_w]))**2
                h_loss = (math.sqrt(targets[sample, 4, cell_h, cell_w]) - math.sqrt(predictions[sample, 4+best_box*5, cell_h, cell_w]))**2
                dim_loss += w_loss + h_loss
                
                # Calculates the loss of the confidence score
                conf_loss += (1 - predictions[sample, best_box*5, cell_h, cell_w])**2
                
                # Calculates the loss for the class scores
                for c in range(num_classes):
                    class_loss += (targets[sample, 5+c, cell_h, cell_w] - predictions[sample, 5*num_boxes+c, cell_h, cell_w])**2
                    
         # Calculate the final loss by summing the other losses and applying the hyperparameters lambda_coord and lambda_noobj
        final_loss += lambda_coord*mid_loss + lamda_coord*dim_loss + lambda_noobj*conf_loss_noobj + conf_loss + class_loss
        
    return final_loss

        
"""
Calculates the Intersection over Union of two bounding boxes
Parameters:
    target (list): A list with bounding box coordinates in the corner format
    predictions (list): A list with bounding box coordinates in the corner format
Returns:
    iou_value (float): The score of the IoU over the two boxes
"""
def IoU(target, prediction):
    # Calculate the corner coordinates of the intersection
    i_x1 = max(target[0], prediction[0])
    i_y1 = max(target[1], prediction[1])
    i_x2 = min(target[2], prediction[2])
    i_y2 = min(target[3], prediction[3])
    
    intersection = max(0,(i_x2-i_x1)) * max(0,(i_y2-i_y1))    
    union = ((target[2]-target[0]) * (target[3]-target[1])) + ((prediction[2]-prediction[0]) * (prediction[3]-prediction[1])) - intersection
    
    iou_value = intersection / union    
    return iou_value


"""
Transforms bounding box coordinates which are in the mid YOLO format into the common corner format
with the correct pixel distance
Parameters:
    mid_box (list): bounding box coordinates which are in the mid YOLO format
    cell_h (int): height index of the cell with the bounding box
    cell_w (int): width index of the cell with the bounding box
Returns:
    corner_box (list): A list containing the coordinates of the bounding box in the common
    corner foormat
"""
def MidtoCorner(mid_box, cell_h, cell_w):
    # Transform the coordinates from the YOLO format into normal pixel values
    centre_x = mid_box[0]*cell_dim + cell_dim*cell_w
    centre_y = mid_box[1]*cell_dim + cell_dim*cell_h
    width = mid_box[2] * 448
    height = mid_box[3] * 448
    
    # Calculate the corner values of the bounding box
    x1 = int(centre_x - width/2)
    y1 = int(centre_y - height/2)
    x2 = int(centre_x + width/2)
    y2 = int(centre_y + height/2)
    
    corner_box = [x1,y1,x2,y2]  
    return corner_box