## Imports

In [324]:
import os
import math
import heapq
from collections import defaultdict

## Settings

In [325]:
num_lowest = 500
scores_file_path = './overlap_scores.txt'
labels_dir = './labels'
score_param_names = ['class', 'iou', 'confidence']
vertices = [f'v{i}' for i in range(1, 9)]
score_param_names.extend(vertices)
label_param_names = [v for v in vertices]
label_param_names.extend(['class'])
blacks = defaultdict(int) # 'blacks' are negative scores
image_scores = defaultdict(list)
image_labels = defaultdict(list)

## Build dict with scores of each image

In [326]:
with open(scores_file_path, 'r') as scores_file:
    for scores in scores_file:
        score_params = {}
        vals = scores.strip().split(' ')
        for i, p in enumerate(vals[1:]):
            score_params[score_param_names[i]] = p
        image_scores[vals[0]].append(score_params)

## Build dict with labels of each image

In [327]:
im_fnames = os.listdir(labels_dir) 
for im_fname in im_fnames:
    imname = im_fname.split('.')[0]
    label_file_name = os.path.join(labels_dir, im_fname)
    with open(label_file_name, 'r') as labels_file:
        for labels in labels_file:
            label_params = {}
            vals = labels.strip().split(' ')
            for i, p in enumerate(vals[:9]):
                label_params[label_param_names[i]] = p
            image_labels[imname].append(label_params)

In [328]:
imnames = list(image_scores.keys())

for imname in imnames:
    score_objects = len(image_scores[imname])
    label_objects = len(image_labels[imname])
    blacks[imname] += abs(score_objects - label_objects)

## Utility function to find the center point of an obb

In [329]:
def find_center(vertices):
    size = len(vertices)
    vxy = ([[vertices[i], vertices[i+1]] for i in range(0, size, 2)])
    min_x = min([v[0] for v in vxy])
    max_x = max([v[0] for v in vxy])
    min_y = min([v[1] for v in vxy])
    max_y = max([v[1] for v in vxy])

    return [(min_x+max_x)/2, (min_y+max_y)/2]

## Utility function to find the euclidean distance between 2 points

In [330]:
def find_dist(c1, c2):
    return math.sqrt((c1[0]-c2[0])**2. + (c1[1]-c2[1])**2)

## Calculate black-points of every image

In [331]:
imnames = list(image_scores.keys())

for imname in imnames:
    # Assign a black_points for the difference in no. of found objects
    score_objects = len(image_scores[imname])
    label_objects = len(image_labels[imname])
    blacks[imname] += abs(score_objects - label_objects)
    
    # Find upper common denominator of objects in both score/label of the image
    common = min(score_objects, label_objects)
    
    # Find the nearest labeled object to every scored object
    # Add a black point if the classes of 2 mostly-aligned objects are different
    score_centers = []
    label_centers = []
    for i in range(score_objects):
        score_vertices = [int(image_scores[imname][i][f'v{j}']) for j in range(1, 9)]
        score_centers.append(find_center(score_vertices))
    for i in range(label_objects):
        label_vertices = [int(image_labels[imname][i][f'v{j}']) for j in range(1, 9)]
        label_centers.append(find_center(label_vertices)) 
    dists = []
    for i, score_center in enumerate(score_centers):
        for j, label_center in enumerate(label_centers):
            dist = find_dist(score_center, label_center)
            heapq.heappush(dists, (dist, i, j))
            
    smallest = heapq.nsmallest(common, dists)
    for s in smallest:
        i = s[1]
        j = s[2]
        if image_scores[imname][i]['class'] != image_labels[imname][j]['class']:
            blacks[imname] += 1
            
    # Add a black point in case confidence < 0.7 and iou > 0.5
    for i in range(score_objects):
        if float(image_scores[imname][i]['confidence']) < 0.7 and float(image_scores[imname][i]['iou']) > 0.5:
            blacks[imname] += 1
            
    # Add a black point in case confidence > 0.3 and iou < 0.5
    for i in range(score_objects):
        if float(image_scores[imname][i]['confidence']) > 0.3 and float(image_scores[imname][i]['iou']) < 0.5:
            blacks[imname] += 1

## Print the list of images sorted by black_points, in descending order

In [332]:
lowest = list(blacks.items())
lowest.sort(key=lambda x:x[1], reverse=True)
lowest

[('v2_frame_004820', 33),
 ('v2_frame_003970', 28),
 ('v2_frame_004210', 27),
 ('v2_frame_003590', 26),
 ('v2_frame_004230', 25),
 ('v2_frame_004260', 24),
 ('v4_frame_000348', 23),
 ('v2_frame_002300', 23),
 ('v2_frame_004090', 23),
 ('v2_frame_003860', 23),
 ('v2_frame_003430', 22),
 ('v2_frame_004960', 22),
 ('v2_frame_003980', 22),
 ('v2_frame_001560', 22),
 ('v2_frame_003820', 21),
 ('v2_frame_003880', 21),
 ('v2_frame_003920', 21),
 ('v2_frame_002610', 21),
 ('v2_frame_005020', 21),
 ('v2_frame_000380', 20),
 ('v2_frame_003470', 20),
 ('v2_frame_004340', 20),
 ('v2_frame_003460', 19),
 ('v2_frame_003490', 19),
 ('v2_frame_003380', 19),
 ('v4_frame_000184', 19),
 ('v2_frame_005060', 19),
 ('v5_frame_000191', 18),
 ('v2_frame_003220', 18),
 ('v2_frame_005120', 18),
 ('v2_frame_005710', 18),
 ('v2_frame_001970', 18),
 ('v2_frame_005660', 18),
 ('v2_frame_004890', 18),
 ('v2_frame_004740', 18),
 ('v2_frame_004720', 18),
 ('v4_frame_000558', 17),
 ('v4_frame_000556', 17),
 ('v2_frame_