# Calculation of Average Precision

In [1]:
'''load packages'''
from __future__ import print_function
from __future__ import division
#
import os
import numpy as np
import time
import copy
import math
#
import cv2
import matplotlib.pyplot as plt
#
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import torchvision
from torchvision import datasets, models, transforms
#
from collections import defaultdict
from tqdm import tqdm
#
import yaml

In [2]:
''' go up one level (for import) '''
import sys
sys.path.insert(0, '..')

In [3]:
'''somewhat dangerous but turn off everything for the time being'''
import warnings
warnings.filterwarnings("ignore")

In [4]:
'''load classes and functions'''
from src.networks.yolo import Yolo
from src.datasets.dataset import ThermalDataset
from src.training.yoloLoss import yoloLoss 
#
from src.datasets.utils import image_weights_from_label_file
#
from src.postprocessing.predictions import Prediction
from src.postprocessing.evaluation import *

In [5]:
''' reverse mapping class index to category '''
from labels.label_utils import category2class
class2category = {v : k for (k, v) in category2class.items()}
print(class2category)

{0: 'person', 1: 'bike', 2: 'car', 3: 'motor', 4: 'bus', 5: 'truck', 6: 'light', 7: 'hydrant', 8: 'sign', 9: 'other_vehicle'}


In [6]:
''' device assignment '''
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

### Load the configurations

In [7]:
with open('../configurations/config.yml', 'r') as f:
    configs = yaml.load(f)
# make modifications
configs['batch size'] = 32
configs['test path'] = '../' + configs['test path']
configs['annot path'] = '../' + configs['annot path']
configs['save path'] = '../' + configs['save path']

### Load the test data

In [8]:
''' data loader '''
Transform = transforms.Compose([transforms.ToTensor()])    
test_set = ThermalDataset(root = os.path.join(configs['test path'], 'data'), 
                         list_file = os.path.join(configs['annot path'], configs['test fname']), 
                         image_size = configs['image size'], 
                         anchors = configs['anchors'],
                         num_classes = configs['num class'],
                         train = False, 
                         transform = Transform)

test_loader = DataLoader(test_set, 
                        batch_size = configs['batch size'],
                        drop_last = False,
                        collate_fn = test_set.collate_fn,
                        shuffle = False, 
                        num_workers = configs['num workers'])

data init


### Set up the model

In [9]:
pth_name = 'YOLO_FLIR_epoch_79.pth'
parameter_path = os.path.join(configs['save path'], pth_name)

net = Yolo(anchors = configs['anchors'], 
           num_classes = configs['num class'],
           cls_probs = torch.Tensor(configs['category frequency']), 
           inp_dim = configs['image size'])

#
model_dict = torch.load(parameter_path)
try:
    net.load_state_dict(model_dict)
except:
     net.load_state_dict(model_dict['model_weights'])

net.to(device)    
net.eval()

Yolo(
  (backbone): Backbone(
    (m): Sequential(
      (0): Focus(
        (conv): Conv(
          (conv): Conv2d(12, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn): BatchNorm2d(48, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
          (act): SiLU()
        )
      )
      (1): Conv(
        (conv): Conv2d(48, 96, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(96, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
        (act): SiLU()
      )
      (2): C3(
        (cv1): Conv(
          (conv): Conv2d(96, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn): BatchNorm2d(48, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
          (act): SiLU()
        )
        (cv2): Conv(
          (conv): Conv2d(96, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn): BatchNorm2d(48, eps=0.001, momentum=0.03, affine=True, track_running_stats

### Predtiction and AP Calculation

In [10]:
configs['obj_thres'] = 0.99              # experiment
prediction = Prediction(anchors = configs['anchors'],
                        beta = configs['beta'], 
                        inp_dim = configs['image size'], 
                        num_classes = configs['num class'],  
                        obj_thres = configs['obj_thres'],
                        conf_thres = configs['conf_thres'],
                        loc_thres = configs['loc_thres'],
                        sigma = configs['sigma'],
                        nms_thres = configs['nms_thres'], 
                        top_k = configs['top_k'], 
                        CUDA = torch.cuda.is_available())

In [11]:
''' AP calculation'''
labels = []
sample_metrics = []  

for batch_i, (imgs, gt_boxes, _, _, _) in enumerate(tqdm(test_loader, desc = "Detecting objects")):
    
    gt_boxes = gt_boxes.to(device) 
    imgs = imgs.to(device)
    
    # Extract labels
    labels += gt_boxes[:, -1].tolist()
    
    with torch.no_grad():
        pred = net(imgs)
    
    outputs = prediction(pred)
    
    sample_metrics += get_batch_statistics(outputs, gt_boxes, iou_threshold = configs['iou_threshold'])
    
#
true_positives, pred_scores, pred_labels = [np.concatenate(x, 0) for x in list(zip(*sample_metrics))]

precision, recall, AP, f1, ap_class = ap_per_class(true_positives, pred_scores, pred_labels, labels)

Detecting objects: 100%|█████████████████████████████████████████████████████████████| 110/110 [01:11<00:00,  1.53it/s]
Computing AP: 100%|█████████████████████████████████████████████████████████████████████| 9/9 [00:00<00:00, 118.68it/s]


## Let's print the per class AP & mAP

In [12]:
print("average precisions per class:")
print("----------------------------------------")
for i, ap in enumerate(AP):
    print(f"{class2category[i]}: {ap}", end = "\n")
print("----------------------------------------")
print(f"mean average precision: {AP.mean()}")

average precisions per class:
----------------------------------------
person: 0.8126683650019046
bike: 0.0
car: 0.7928876588546154
motor: 0.1386380500523739
bus: 0.1251476167960378
truck: 0.3029401879259137
light: 0.02
hydrant: 0.1328846678398106
sign: 0.005772005772005772
----------------------------------------
mean average precision: 0.2589931724714069


## Comparison
- FLIR baseline YOLOX-m has AP 0.7533 & 0.7723 for person and car classes respectively. So, in terms of accuracy this is an improvement
- According to the YOLOX technical report (https://arxiv.org/pdf/2107.08430.pdf) YOLOX-m has 25.3 M parameters and 73.8 GFLOPS for 640 x 640 resolution for MSCOCO. In contrast, YOLOv5-m has 21.4 M parameters and 51.4 GFLOPS. Hence, the trained model may  also be more efficient (but it obviuosly depends on the implementation too).