In [1]:
import pandas as pd

df = pd.read_parquet('ground_truth_preds.parquet')

In [2]:
df

Unnamed: 0,filename,gt_bbox,gt_category_id,pred_bbox,pred_conf,pred_category_id
0,000000000025.jpg,"[[385.53, 60.03, 214.97, 297.16], [53.01, 356....","[23, 23]","[[383.285, 63.889999999999986, 216.63, 294.06]...","[0.9051, 0.4291]","[23, 23]"
1,000000000030.jpg,"[[204.86, 31.02, 254.88, 324.12], [237.56, 155...","[58, 75]","[[239.97500000000002, 155.625, 167.49, 195.83]...","[0.9049, 0.2566]","[75, 58]"
2,000000000036.jpg,"[[0.0, 50.12, 457.68, 430.35], [167.58, 162.89...","[25, 0]","[[174.31000000000003, 157.92, 285.84, 481.92],...","[0.9115, 0.7233]","[0, 25]"
3,000000000061.jpg,"[[261.2, 205.92, 48.08, 57.24], [393.39, 210.3...","[0, 0, 20, 20, 0]","[[222.41499999999996, 239.29999999999998, 145....","[0.5402, 0.4533]","[19, 0]"
4,000000000109.jpg,"[[539.72, 295.45, 22.91, 18.39], [512.89, 271....","[16, 0, 0, 0, 13, 13, 0, 0]","[[513.9350000000001, 271.4, 16.51, 38.58], [58...","[0.7398, 0.6102, 0.261, 0.2607]","[0, 0, 8, 0]"
...,...,...,...,...,...,...
24995,000000581863.jpg,"[[1.66, 589.63, 113.04, 41.56]]",[61],"[[0.32499999999999574, 587.335, 111.59, 52.29]]",[0.6729],[61]
24996,000000581881.jpg,"[[309.79, 229.12, 41.88, 67.3], [330.37, 217.2...","[18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 1...","[[369.185, 149.315, 59.95, 185.21], [197.45000...","[0.9141, 0.7886, 0.7456, 0.7004, 0.6893, 0.633...","[0, 18, 18, 2, 18, 18, 18, 0, 18]"
24997,000000581900.jpg,"[[81.01, 193.51, 26.01, 43.08], [35.18, 195.71...","[56, 56, 3, 0, 0, 0, 26, 0, 74, 0, 0, 28]","[[316.955, 145.91000000000003, 23.51, 70.84], ...","[0.7873, 0.6988, 0.6745, 0.6387, 0.5019, 0.476...","[0, 0, 0, 56, 3, 0, 74, 0, 0, 56, 74]"
24998,000000581903.jpg,"[[359.77, 298.7, 59.73, 161.69]]",[39],"[[360.755, 295.855, 58.05, 163.77], [296.51, 2...","[0.7886, 0.6118, 0.5375, 0.4623, 0.4502, 0.349...","[39, 39, 39, 41, 39, 2, 39]"


## IOU 

In [3]:
import numpy as np

def calculate_iou(bbox1, bbox2):
    x1, y1, w1, h1 = bbox1
    x2, y2, w2, h2 = bbox2

    x_left = max(x1, x2)
    y_top = max(y1, y2)
    x_right = min(x1 + w1, x2 + w2)
    y_bottom = min(y1 + h1, y2 + h2)

    if x_right < x_left or y_bottom < y_top:
        return 0.0

    intersection_area = (x_right - x_left) * (y_bottom - y_top)
    bbox1_area = w1 * h1
    bbox2_area = w2 * h2
    union_area = bbox1_area + bbox2_area - intersection_area

    iou = intersection_area / union_area
    return iou


def calculate_loss(row, iou_threshold=0.5):
    gt_boxes = row['gt_bbox']
    gt_labels = row['gt_category_id']
    pred_boxes = row['pred_bbox']
    pred_labels = row['pred_category_id']
    pred_scores = row['pred_conf']

    # Calculate localization loss (e.g., Smooth L1 loss)
    localization_loss = 0.0
    for gt_box, pred_box in zip(gt_boxes, pred_boxes):
        localization_loss += np.sum(np.abs(np.array(gt_box) - np.array(pred_box)))

    # Calculate classification loss (e.g., Cross-entropy loss)
    classification_loss = 0.0
    for gt_label, pred_label, pred_score in zip(gt_labels, pred_labels, pred_scores):
        if gt_label == pred_label:
            classification_loss -= np.log(pred_score)
        else:
            classification_loss -= np.log(1 - pred_score)

    # Calculate IoU for each predicted box with respect to ground truth boxes
    ious = []
    for pred_box in pred_boxes:
        max_iou = 0.0
        for gt_box in gt_boxes:
            iou = calculate_iou(gt_box, pred_box)
            max_iou = max(max_iou, iou)
        ious.append(max_iou)

    # Calculate IoU loss
    iou_loss = 0.0
    for iou in ious:
        if iou >= iou_threshold:
            iou_loss += -np.log(iou)
        else:
            iou_loss += -np.log(1 - iou)

    # Combine the losses
    total_loss = localization_loss + classification_loss + iou_loss
    return total_loss

In [4]:
df['total_loss'] = df.apply(calculate_loss, axis=1)
# df = df.sort_values('loss', ascending=False)
df

Unnamed: 0,filename,gt_bbox,gt_category_id,pred_bbox,pred_conf,pred_category_id,total_loss
0,000000000025.jpg,"[[385.53, 60.03, 214.97, 297.16], [53.01, 356....","[23, 23]","[[383.285, 63.889999999999986, 216.63, 294.06]...","[0.9051, 0.4291]","[23, 23]",28.349750
1,000000000030.jpg,"[[204.86, 31.02, 254.88, 324.12], [237.56, 155...","[58, 75]","[[239.97500000000002, 155.625, 167.49, 195.83]...","[0.9049, 0.2566]","[75, 58]",745.342433
2,000000000036.jpg,"[[0.0, 50.12, 457.68, 430.35], [167.58, 162.89...","[25, 0]","[[174.31000000000003, 157.92, 285.84, 481.92],...","[0.9115, 0.7233]","[0, 25]",958.966831
3,000000000061.jpg,"[[261.2, 205.92, 48.08, 57.24], [393.39, 210.3...","[0, 0, 20, 20, 0]","[[222.41499999999996, 239.29999999999998, 145....","[0.5402, 0.4533]","[19, 0]",399.165001
4,000000000109.jpg,"[[539.72, 295.45, 22.91, 18.39], [512.89, 271....","[16, 0, 0, 0, 13, 13, 0, 0]","[[513.9350000000001, 271.4, 16.51, 38.58], [58...","[0.7398, 0.6102, 0.261, 0.2607]","[0, 0, 8, 0]",1095.739422
...,...,...,...,...,...,...,...
24995,000000581863.jpg,"[[1.66, 589.63, 113.04, 41.56]]",[61],"[[0.32499999999999574, 587.335, 111.59, 52.29]]",[0.6729],[61],16.467503
24996,000000581881.jpg,"[[309.79, 229.12, 41.88, 67.3], [330.37, 217.2...","[18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 1...","[[369.185, 149.315, 59.95, 185.21], [197.45000...","[0.9141, 0.7886, 0.7456, 0.7004, 0.6893, 0.633...","[0, 18, 18, 2, 18, 18, 18, 0, 18]",1385.753266
24997,000000581900.jpg,"[[81.01, 193.51, 26.01, 43.08], [35.18, 195.71...","[56, 56, 3, 0, 0, 0, 26, 0, 74, 0, 0, 28]","[[316.955, 145.91000000000003, 23.51, 70.84], ...","[0.7873, 0.6988, 0.6745, 0.6387, 0.5019, 0.476...","[0, 0, 0, 56, 3, 0, 74, 0, 0, 56, 74]",2829.141351
24998,000000581903.jpg,"[[359.77, 298.7, 59.73, 161.69]]",[39],"[[360.755, 295.855, 58.05, 163.77], [296.51, 2...","[0.7886, 0.6118, 0.5375, 0.4623, 0.4502, 0.349...","[39, 39, 39, 41, 39, 2, 39]",7.877724


## Plot the top loss value

In [None]:
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import pandas as pd
from PIL import Image

img_folder_path = '/workspace/datasets/coco_minitrain_25k/images/train2017/'

for _, row in df.tail(100).sample(10).iterrows():
    filename = row['filename']
    gt_bboxes = row['gt_bbox']
    gt_category_ids = row['gt_category_id']
    pred_bboxes = row['pred_bbox']
    pred_confs = row['pred_conf']
    pred_category_ids = row['pred_category_id']

    # Load the image
    image = Image.open(img_folder_path + filename)

    # Create a figure with 3 subplots
    fig, axes = plt.subplots(1, 3, figsize=(15, 5))

    # Display the image in the first subplot
    axes[0].imshow(image)

    # Plot ground truth bounding boxes in the first subplot
    for bbox, category_id in zip(gt_bboxes, gt_category_ids):
        x, y, w, h = bbox
        rect = patches.Rectangle((x, y), w, h, linewidth=2, edgecolor='g', facecolor='none', label='Ground Truth')
        axes[0].add_patch(rect)
        axes[0].text(x, y, str(category_id), color='g', fontsize=12, verticalalignment='top', bbox=dict(facecolor='white', alpha=0.5))

    # Plot predicted bounding boxes in the first subplot
    for bbox, conf, category_id in zip(pred_bboxes, pred_confs, pred_category_ids):
        x, y, w, h = bbox
        rect = patches.Rectangle((x, y), w, h, linewidth=2, edgecolor='r', facecolor='none', label='Prediction')
        axes[0].add_patch(rect)
        axes[0].text(x, y, f'{category_id}: {conf:.2f}', color='r', fontsize=12, verticalalignment='top', bbox=dict(facecolor='white', alpha=0.5))

    # Remove axis for the first subplot
    axes[0].axis('off')

    # Plot ground truth bounding boxes in the second subplot
    axes[1].imshow(image)
    for bbox, category_id in zip(gt_bboxes, gt_category_ids):
        x, y, w, h = bbox
        rect = patches.Rectangle((x, y), w, h, linewidth=2, edgecolor='g', facecolor='none', label='Ground Truth')
        axes[1].add_patch(rect)
        axes[1].text(x, y, str(category_id), color='g', fontsize=12, verticalalignment='top', bbox=dict(facecolor='white', alpha=0.5))
    axes[1].set_title('Ground Truth')
    axes[1].axis('off')

    # Plot predicted bounding boxes in the third subplot
    axes[2].imshow(image)
    for bbox, conf, category_id in zip(pred_bboxes, pred_confs, pred_category_ids):
        x, y, w, h = bbox
        rect = patches.Rectangle((x, y), w, h, linewidth=2, edgecolor='r', facecolor='none', label='Prediction')
        axes[2].add_patch(rect)
        axes[2].text(x, y, f'{category_id}: {conf:.2f}', color='r', fontsize=12, verticalalignment='top', bbox=dict(facecolor='white', alpha=0.5))
    axes[2].set_title('Prediction')
    axes[2].axis('off')

    # Adjust spacing between subplots
    plt.tight_layout()

    # Show the plot
    plt.show()

In [None]:
df_sorted