## Imports

In [1]:
import os
import shutil
import math
import heapq
import numpy as np
import cv2
from tqdm import tqdm
from matplotlib import pyplot as plt
from collections import defaultdict

## Settings

In [2]:
num_lowest = 10500
fp_threshold = 0.86
fn_threshold = 0.83
scores_file_path = './overlap_scores.txt'
labels_dir = './labels'
src_dir = './images'
dst_dir = './lowest'
score_param_names = ['class', 'iou', 'confidence']
vertices = [f'v{i}' for i in range(1, 9)]
score_param_names.extend(vertices)
label_param_names = [v for v in vertices]
label_param_names.extend(['class'])
blacks = defaultdict(int) # 'blacks' are negative scores
gap = defaultdict(int)
fp = defaultdict(int)
fn = defaultdict(int)
clserr = defaultdict(int)
low_iou = defaultdict(int)
low_conf = defaultdict(int)
image_scores = defaultdict(list)
image_labels = defaultdict(list)

## Utility function to display an image

In [3]:
def display(img, title):
    plt.imshow(img)
    plt.title(title)
    plt.show()

## Utility function to calculate the IOU of 2 oriented bounding boxes (courtesy ChatGPT)

In [4]:
def calc_iou(box1, box2):
    im = np.zeros((1500, 1500), dtype=np.uint8)
    im1 = np.zeros((1500, 1500), dtype=np.uint8)
    im2 = np.zeros((1500, 1500), dtype=np.uint8)

    cv2.fillPoly(im, [box1], 255)
    cv2.fillPoly(im, [box2], 255)
    uarea = cv2.countNonZero(im)

    cv2.fillPoly(im1, [box1], 255)
    cv2.fillPoly(im2, [box2], 255)
    intersection = cv2.bitwise_and(im1, im2)
    iarea = cv2.countNonZero(intersection)

    return iarea/uarea

## Build dict with scores of each image

In [5]:
with open(scores_file_path, 'r') as scores_file:
    for scores in scores_file:
        score_params = {}
        vals = scores.strip().split(' ')
        for i, p in enumerate(vals[1:]):
            score_params[score_param_names[i]] = p
        image_scores[vals[0]].append(score_params)

## Build dict with labels of each image

In [6]:
im_fnames = os.listdir(labels_dir) 
for im_fname in im_fnames:
    imname = im_fname.split('.')[0]
    label_file_name = os.path.join(labels_dir, im_fname)
    with open(label_file_name, 'r') as labels_file:
        for labels in labels_file:
            label_params = {}
            vals = labels.strip().split(' ')
            for i, p in enumerate(vals[:9]):
                label_params[label_param_names[i]] = p
            image_labels[imname].append(label_params)

## Calculate black-points of every image

In [7]:
imnames = list(image_scores.keys())

for imname in tqdm(imnames):
    # Assign a black_points for the difference in no. of found objects
    score_objects = len(image_scores[imname])
    label_objects = len(image_labels[imname])
    blacks[imname] += abs(score_objects - label_objects)
    gap[imname] += abs(score_objects - label_objects)
    if score_objects > label_objects:
        fp[imname] += score_objects - label_objects
    elif label_objects > score_objects:
        fn[imname] += label_objects - score_objects

    # Find most ovelapping obb's, to add black-points if their 
    # iou and confidence are'nt aligned
    match_heap = []
    for i, score_object in enumerate(image_scores[imname]):
        max_iou = 0.
        max_j = -1
        for j, label_object in enumerate(image_labels[imname]):
            score_vertices = [int(float(image_scores[imname][i][f'v{j}'])) for j in range(1, 9)]
            label_vertices = [int(image_labels[imname][j][f'v{k}']) for k in range(1, 9)]
            box1 = [[score_vertices[k], score_vertices[k+1]] for k in range(0, 8, 2)]
            box2 = [[label_vertices[k], label_vertices[k+1]] for k in range(0, 8, 2)]
            box1 = np.array(box1)
            box2 = np.array(box2)
            box1 = box1.reshape(-1, 1, 2)
            box2 = box2.reshape(-1, 1, 2)
            iou = calc_iou(box1, box2) 
            if iou > max_iou:
                max_iou = iou
                max_j = j
        heapq.heappush(match_heap, [max_iou, i, max_j])
    matching = heapq.nlargest(label_objects, match_heap)
    
    # Add black-points for matching objects with mis-aligned classes or iou/confidence
    for m in matching:
        i = m[1]
        j = m[2]
        
        # Add a black point if the class of the scored object is different than
        # the class of the matching labeled object
        if image_scores[imname][i]['class'] != image_labels[imname][j]['class']:
            blacks[imname] += 1
            clserr[imname] += 1

        # Add a black point for a matching object in case confidence < fp_threshold and iou > 0.5
        if float(image_scores[imname][i]['confidence']) < fp_threshold and float(image_scores[imname][i]['iou']) > 0.5:
            blacks[imname] += 1
            low_conf[imname] += 1

        # Add a black point for a matching object in case confidence > fn_threshold and iou < 0.5
        if float(image_scores[imname][i]['confidence']) > fn_threshold and float(image_scores[imname][i]['iou']) < 0.5:
            blacks[imname] += 1
            low_iou[imname] += 1            

100%|██████████| 604/604 [00:46<00:00, 13.13it/s]


## Make a list of lowest-performing (= highest black-points scoring) images

In [8]:
sorted_by_score = list(blacks.items())
sorted_by_score.sort(key=lambda x:x[1], reverse=True)
lowest = sorted_by_score[:num_lowest]
lowest = [l for l in lowest if l[1] > 0]

## Draw label & score bboxes/classes on image and store it

In [9]:
debug_image = 'v4_frame_000669'
imnames = [l[0] for l in lowest]

for imname in tqdm(imnames):
    label_objects = len(image_labels[imname])
    score_objects = len(image_scores[imname])
    fname = f'{imname}.png'
    try:
        img = cv2.imread(os.path.join(src_dir, fname))
    except Exception:
        continue

    for i in range(label_objects):
        x1 = int(image_labels[imname][i]['v1'])
        y1 = int(image_labels[imname][i]['v2'])
        x2 = int(image_labels[imname][i]['v3'])
        y2 = int(image_labels[imname][i]['v4'])
        x3 = int(image_labels[imname][i]['v5'])
        y3 = int(image_labels[imname][i]['v6'])
        x4 = int(image_labels[imname][i]['v7'])
        y4 = int(image_labels[imname][i]['v8'])

        pts = [[x1, y1], [x2, y2], [x3, y3], [x4, y4]]
        pts = np.array(pts, dtype=np.int32)
        _ = cv2.polylines(img, [pts], True, (0, 255, 0), 2)
        object_name = image_labels[imname][i]['class']
        x = int((x1 + x2 + x3 + x4) / 4)
        y = int((y1 + y2 + y3 + y4) / 4) + 30
        _ = cv2.putText(img, object_name, (x, y), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0, 255, 0), 2)

    for i in range(score_objects):
        if imname == debug_image:
            print(f'score_objects: {score_objects}')
        x1 = float(image_scores[imname][i]['v1'])
        y1 = float(image_scores[imname][i]['v2'])
        x2 = float(image_scores[imname][i]['v3'])
        y2 = float(image_scores[imname][i]['v4'])
        x3 = float(image_scores[imname][i]['v5'])
        y3 = float(image_scores[imname][i]['v6'])
        x4 = float(image_scores[imname][i]['v7'])
        y4 = float(image_scores[imname][i]['v8'])

        pts = [[x1, y1], [x2, y2], [x3, y3], [x4, y4]]
        pts = np.array(pts, dtype=np.int32)
        _ = cv2.polylines(img, [pts], True, (0, 0, 255), 2)
        confidence = float(image_scores[imname][i]["confidence"])
        object_info = f'{image_scores[imname][i]["class"]} {confidence:.1f}'
        if imname == debug_image:
            print(f'i: {i}, object_info: {object_info}')
        x = int((x1 + x2 + x3 + x4) / 4)
        y = int((y1 + y2 + y3 + y4) / 4)
        _ = cv2.putText(img, object_info, (x, y), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0, 0, 255), 2)
        
    dst_name = f'{imname}_{blacks[imname]}_fp_{fp[imname]}_fn_{fn[imname]}_clserr_{clserr[imname]}_low_conf_{low_conf[imname]}_low_iou_{low_iou[imname]}.jpg'
    dst = os.path.join(dst_dir, dst_name)
    cv2.imwrite(dst, img)

  4%|▍         | 2/52 [00:00<00:03, 15.41it/s]

score_objects: 6
i: 0, object_info: person 1.0
score_objects: 6
i: 1, object_info: person 0.9
score_objects: 6
i: 2, object_info: pallet 1.0
score_objects: 6
i: 3, object_info: forklift 1.0
score_objects: 6
i: 4, object_info: forklift 1.0
score_objects: 6
i: 5, object_info: forklift 0.8


100%|██████████| 52/52 [00:02<00:00, 18.86it/s]


In [10]:
lowest

[('v4_frame_000669', 2),
 ('v2_frame_005710', 2),
 ('v2_frame_004890', 2),
 ('v2_frame_004510', 2),
 ('v4_frame_000170', 1),
 ('v4_frame_000096', 1),
 ('v2_frame_003590', 1),
 ('v5_frame_000125', 1),
 ('v4_frame_000667', 1),
 ('v2_frame_000260', 1),
 ('v2_frame_003220', 1),
 ('v5_frame_000127', 1),
 ('v5_frame_000138', 1),
 ('v2_frame_003200', 1),
 ('v2_frame_003880', 1),
 ('v4_frame_000188', 1),
 ('v2_frame_002300', 1),
 ('v2_frame_003920', 1),
 ('v2_frame_004210', 1),
 ('v2_frame_000480', 1),
 ('v2_frame_004500', 1),
 ('v2_frame_004880', 1),
 ('v2_frame_004260', 1),
 ('v2_frame_004090', 1),
 ('v2_frame_004340', 1),
 ('v2_frame_001970', 1),
 ('v2_frame_005120', 1),
 ('v1_frame_000243', 1),
 ('v2_frame_000000', 1),
 ('v2_frame_000130', 1),
 ('v2_frame_004740', 1),
 ('v2_frame_004820', 1),
 ('v2_frame_004750', 1),
 ('v2_frame_005660', 1),
 ('v2_frame_002710', 1),
 ('v2_frame_004240', 1),
 ('v2_frame_004560', 1),
 ('v2_frame_004520', 1),
 ('v2_frame_005020', 1),
 ('v2_frame_004720', 1),
