This notebook loads pre-computed detections from the _darknet_evaluation_main.py_ and _darknet_evaluation_post_inference.py_ and compares them to an annotation input file. For every annotated frame, detections and ground truth annotations are displayed with a set overlap threshold (e.g. 5%, 10%)

In [1]:
import numpy as np
import pickle
import os
from os import listdir
from os.path import join
from pathlib import Path
import subprocess
import argparse
import time
import threading
import queue
import sys
import cv2
from scipy.spatial import distance

Define the model detection path, the annotation path, as well as the confidence and overlap threshold: 

In [2]:
annotation_file = "D:/BENCHMARK/EVALUATION/ANNOTATIONS_ALL.pkl"
obj_path = "D:/BENCHMARK/REAL/all/data/obj"
detection_file = "D:/BENCHMARK/OUTPUT/rc2/rc2_yolov4_array_HPC_new_20000.pkl"
confidence_thresh = 0.6
overlap_thresh = 0.05

In [3]:
def compare_points(gt, detection, max_dist=25):
    match = False
    px_distance = distance.euclidean(gt, detection)
    if px_distance <= max_dist:
        match = True
    return match, px_distance


def compare_frame(frame_gt, frame_detections, max_dist=0.05, network_shape=[None, None], confidence=0, img=None):
    font = cv2.FONT_HERSHEY_SIMPLEX
    # strip away all sub threshold detections!
    frame_detections = [f for f in frame_detections if f[1] > confidence]

    matches_gt = np.ones(len(frame_gt))
    matches_det = np.ones(len(frame_detections))
    below_thresh = 0
    detection_distances = []

    # now strip all empty entries from the ground truth

    for i in range(len(matches_gt)):
        min_dist = max_dist
        for j in range(len(matches_det)):
            
            cv2.circle(img,
                       (int(frame_detections[j][2][0]),int(frame_detections[j][2][1])), 
                       10, 
                       (255, 0, 0), 
                       2)

            if network_shape[0] is not None:
                norm_frame_detection = [frame_detections[j][2][0] / network_shape[0],
                                        frame_detections[j][2][1] / network_shape[1]]

            else:
                norm_frame_detection = frame_detections[j][2][0:2]

            match, px_dist = compare_points(gt=frame_gt[i][0:2],
                                            detection=norm_frame_detection,
                                            max_dist=max_dist)

            if match:
                cv2.circle(img, 
                           (int(frame_gt[i][0] * network_shape[0]),int(frame_gt[i][1] * network_shape[1])), 
                           int(max_dist * network_shape[0]), 
                           (0, 255, 0), 
                           2)
                matches_gt[i] = 0
                matches_det[j] = 0
                if px_dist < min_dist:
                    min_dist = px_dist
        """
        if not match:
            cv2.circle(img, 
                           (int(frame_gt[i][0] * network_shape[0]),int(frame_gt[i][1] * network_shape[1])), 
                           int(max_dist * network_shape[0]), 
                           (0, 0, 255), 
                           3)
        """
        
        if min_dist < max_dist:
            detection_distances.append(min_dist)

    missed_detections = int(np.sum(matches_gt))
    false_positives = int(np.sum(matches_det)) - below_thresh

    if len(detection_distances) == 0:
        mean_detection_distance = 0
    else:
        mean_detection_distance = np.mean(np.array(detection_distances))
    
    if img is not None:
        return len(frame_gt), missed_detections, false_positives, mean_detection_distance, img
    else:
        return len(frame_gt), missed_detections, false_positives, mean_detection_distance

def process_detections(data):
    print("Running evaluation of ", data, "...")

    with open(annotation_file, 'rb') as f:
        all_annotations = pickle.load(f)

    snapshots = [detection_file]
    all_detections = []

    for snapshot in snapshots:
        with open(snapshot, 'rb') as f:
            all_detections.append([snapshot, pickle.load(f)])

    print("ran inference on {} frames, using {}".format(len(all_detections[-1][1]), data))

    max_detection_distance_px = overlap_thresh  # 0.1 = 10% away from centre to be considered a valid detection
    thresh_list = [confidence_thresh]
    print("Computing AP scores for thresholds of {}".format(thresh_list))
    
    

    Results_mat = []

    # matrix shape: dataset(samples) , model x iteration x threshold
    
    for model in all_detections:
        print("\n", model[0])

        Results_mat.append([model[0]])

        for confidence in thresh_list:
            Results_mat[-1].append([confidence])

            print("\n running inference at {} confidence threshold".format(confidence))

            for u, unique_dataset in enumerate(all_annotations):
                """
                if unique_dataset[0] != "real_close_frame":
                    continue
                """

                print("dataset:", unique_dataset[0])

                total_gt_detections = 0  # number of total detections in the ground truth dataset
                total_missed_detections = 0  # number of missed detections which are present in the groud truth dataset
                total_false_positives = 0  # number of incorrect detections that do not match any groud thruth tracks
                all_frame_detection_deviations = []  # list of mean deviations for correct detections

                for detection, annotation, img_path in zip(model[1][u*1000:], unique_dataset[1:], image_datasets[u]):
                    img = cv2.imread(img_path)
                    img = cv2.resize(img,(800,800))
                    
                    gt_detections, missed_detections, false_positives, mean_detection_distance,out_img = compare_frame(
                        annotation,
                        detection,
                        max_detection_distance_px,
                        [800, 800],
                        confidence,img)
                    
                    cv2.imshow("img",out_img)
                    cv2.waitKey(1)

                    total_gt_detections += gt_detections
                    total_missed_detections += missed_detections
                    total_false_positives += false_positives
                    all_frame_detection_deviations.append(mean_detection_distance)

                mean_px_error = np.mean(all_frame_detection_deviations) * 100
                detection_accuracy = ((
                                              total_gt_detections - total_missed_detections - total_false_positives) / total_gt_detections) * 100

                if total_gt_detections == total_missed_detections:
                    # the accuracy is zero if no objects are correctly detected
                    AP = 0
                else:
                    AP = (total_gt_detections - total_missed_detections) / (
                            total_gt_detections - total_missed_detections + total_false_positives)
                    Recall = (total_gt_detections - total_missed_detections) / total_gt_detections

                print("Total ground truth detections:", total_gt_detections)
                print("Total correct detections:", total_gt_detections - total_missed_detections)
                print("Total missed detections:", total_missed_detections)
                print("Total false positives:", total_false_positives)
                print("Average Precision:", round(AP, 3))
                print("Recall:", round(Recall, 3))
                print("Detection accuracy (GT - FP - MD) / GT):", np.round(detection_accuracy, 1), "%")
                print("Mean relative deviation: {} %\n".format(np.round(mean_px_error, 3)))

                Results_mat[-1][-1].append([unique_dataset[0],
                                            total_gt_detections,
                                            total_gt_detections - total_missed_detections,
                                            total_missed_detections,
                                            total_false_positives,
                                            AP,
                                            Recall])

In [4]:
image_paths = [os.path.join(Path(obj_path),file) for file in listdir(obj_path) if file.endswith(".JPG")]
image_paths.sort()
print("Found {} images.\n".format(len(image_paths)))

def chunks(xs, n):
    n = max(1, n)
    return [xs[i:i+n] for i in range(0, len(xs), n)]

image_datasets = chunks(image_paths,1000)
print(len(image_datasets))

process_detections(detection_file)
cv2. destroyAllWindows() 

Found 5000 images.

5
Running evaluation of  D:/BENCHMARK/OUTPUT/rc2/rc2_yolov4_array_HPC_new_20000.pkl ...
ran inference on 5000 frames, using D:/BENCHMARK/OUTPUT/rc2/rc2_yolov4_array_HPC_new_20000.pkl
Computing AP scores for thresholds of [0.6]

 D:/BENCHMARK/OUTPUT/rc2/rc2_yolov4_array_HPC_new_20000.pkl

 running inference at 0.6 confidence threshold
dataset: real_base_frame
Total ground truth detections: 67930
Total correct detections: 48209
Total missed detections: 19721
Total false positives: 3620
Average Precision: 0.93
Recall: 0.71
Detection accuracy (GT - FP - MD) / GT): 65.6 %
Mean relative deviation: 0.422 %

dataset: real_bright_frame
Total ground truth detections: 77348
Total correct detections: 16394
Total missed detections: 60954
Total false positives: 1423
Average Precision: 0.92
Recall: 0.212
Detection accuracy (GT - FP - MD) / GT): 19.4 %
Mean relative deviation: 0.64 %

dataset: real_close_frame
Total ground truth detections: 18392
Total correct detections: 14751
Tot