In [None]:
import numpy as np
from scipy.optimize import linear_sum_assignment

In [None]:
def stabilize(confidence, very_small_value=1e-8, dtype=np.float32):
    confidence[confidence > 1.0]  = 1.0
    confidence[confidence <= 0.0]  = very_small_value
    return confidence.astype(dtype)

In [None]:

def loss(predictions, truths, alpha=1.0):
    """
    predictions is a float matrix with the dimensions [batch_size, num_predictions, 5]
        - the '5' represents x, y, height, width & confidence of each prediction of each batch item
        
    truths is a float matrix with the dimensions [batch_size, num_ground_truths, 4]
        - the '4' represents x, y, height & width of each ground truth of each batch item
    
    for all items in the batch, num_predictions will be the same 
        (as determined by the image sizes in the batch and the number of bounding 
        boxes we want to predict per location)
        
    for all items in the batch, num_ground_truths will be different for different images
    
    """
    
    assert len(predictions) == len(truths), 'Batch size of predictions and ground truths do not match'
        
    predictions = np.array(predictions)
    truths      = np.array(truths)
    batch_size  = len(predictions)
    the_loss    = None
    
    for batch_idx in range(batch_size):
        
        # Create the cost matrix: predictions vs ground truths
        ######################################################
        
        num_preds = len(predictions[batch_idx])
        num_truths = len(truths[batch_idx])
        
        if num_preds == 0 or num_truths == 0:
            continue # no way to calculate loss for this batch item
        
        # For numerical stability
        predictions[batch_idx][:, 4] = stabilize(predictions[batch_idx][:, 4])
        
        # extract (1-confidence) values
        one_minus_confidence = np.expand_dims(1.0 - predictions[batch_idx][:, 4], axis=1)
        
        # For numerical stability
        one_minus_confidence = stabilize(one_minus_confidence)
        
        # the cost matrix, initialized to 0
        cost_matrix = np.zeros((num_preds, num_truths))
        
        # fill the cost matrix
        # !! TODO - Vectorize this loop !!
        for t in range(num_truths):
            for p in range(num_preds):
                localization_cost = alpha * np.linalg.norm(predictions[batch_idx][p][:4] - truths[batch_idx][t])**2
                confidence_cost = -1 * np.log(predictions[batch_idx][p][4])
                cost_matrix[p, t] = localization_cost + confidence_cost
        
        # run the hungarian algorithm
        row_ind, col_ind = linear_sum_assignment(cost_matrix) 

        # Calculate the loss
        ######################################################
        # !! TODO - Vectorize this loop !!
        for t in range(num_truths):
            loop_loss = 0.0
            for p in range(num_preds):
                if t in col_ind and p in row_ind:
                    print(cost_matrix[p][t])
                    loop_loss += cost_matrix[p][t] # the paper uses a different alpha value for this step
                elif p not in row_ind: # !!
                    loop_loss -= np.log(one_minus_confidence[p])
            
            if the_loss is None:
                the_loss = 0.0

            the_loss += loop_loss
        
    return the_loss

In [None]:
p = np.array([[[10, 20, 30, 40, 0.001], [1, 2, 3, 4, .98]]])

g = np.array([[[1., 2, 3, 4.]]])

l = loss(p, g)
print(l)

In [None]:
p = np.array([[[10, 20, 30, 40, 0.001], [1, 2, 3, 4, .98]], 
              [[1, 1, 1, 2, 0.9], [3, 4, 5, 6, .98]]])

g = np.array([[[1., 2, 3, 4.]], 
              [[1, 1, 1, 2], [3, 4, 5, 6]]])

l = loss(p, g)
print(l)

In [None]:
p = np.array([[[1, 1, 1, 2, 0.9], [3, 4, 5, 6, .9]]])

g = np.array([[[1, 1, 1, 2], [3, 4, 5, 6]]])

l = loss(p, g, alpha=1)
print(l)