In [1]:
import json
import os

import argparse
import torch
from tqdm import tqdm
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval

from backbone import EfficientDetBackbone
from efficientdet.utils import BBoxTransform, ClipBoxes
from utils.utils import preprocess, invert_affine, postprocess, boolean_string

In [2]:
compound_coef = 0
nms_threshold = 0.5
use_cuda = True
gpu = 0
override_prev_results = True
float16 = False

In [3]:
weights_path = f'weights/efficientdet-d0_1_18000.pth' 
num_gpus=1

# mean and std in RGB order
mean= [0.485, 0.456, 0.406]
std= [0.229, 0.224, 0.225]

# coco anchors
anchors_scales= '[2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)]'
anchors_ratios= '[(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)]'

obj_list = ['3m''abus','accenture','adidas','adidas1','adidas_text','airhawk','airness','aldi','aldi_text','alfaromeo','allett','allianz','allianz_text',
            'aluratek','aluratek_text','amazon','amcrest','amcrest_text','americanexpress','americanexpress_text','android','anz','anz_text',
            'apc','apecase','apple','aquapac_text','aral','armani','armitron','aspirin','asus','at_and_t','athalon','audi','audi_text',
            'axa','bacardi','bankofamerica','bankofamerica_text','barbie','barclays','base','basf','batman','bayer','bbc','bbva',
          'becks','bellataylor','bellodigital','bellodigital_text','bem','benrus','bershka','bfgoodrich','bik','bionade',
            'blackmores','blizzardentertainment','bmw','boeing','boeing_text','bosch','bosch_text','bottegaveneta','bridgestone','bridgestone_text','budweiser','budweiser_text','bulgari','burgerking',
            'burgerking_text','calvinklein','canon','carglass','carlsberg','carters','cartier','caterpillar','chanel','chanel_text','cheetos','chevrolet',
            'chevrolet_text','chevron','chickfila','chimay','chiquita','cisco','citi','citroen','citroen_text','coach','cocacola','coke','colgate','comedycentral','converse','corona','corona_text',
            'costa','costco','cpa_australia','cvs','cvspharmacy','danone','dexia','dhl','disney','doritos','drpepper','dunkindonuts','ebay','ec','erdinger','espn','esso','esso_text',
            'evernote','facebook','fedex','ferrari','firefox','firelli','fly_emirates','ford','fosters','fritolay','fritos',
            'gap','generalelectric','gildan','gillette','goodyear','google','gucci','guinness','hanes','head','head_text','heineken','heineken_text','heraldsun','hermes','hersheys',
            'hh','hisense','hm','homedepot','homedepot_text','honda','honda_text','hp','hsbc','hsbc_text','huawei','huawei_text','hyundai','hyundai_text','ibm','ikea','infiniti''infiniti_text',
            'intel','internetexplorer','jackinthebox','jacobscreek','jagermeister','jcrew','jello','johnnywalker','jurlique','kelloggs',
            'kfc','kia','kitkat','kodak','kraft','lacoste','lacoste_text','lamborghini','lays','lego','levis','lexus','lexus_text',
            'lg','londonunderground','loreal','lotto','luxottica','lv','marlboro','marlboro_fig','marlboro_text','maserati','mastercard','maxwellhouse',
            'maxxis','mccafe','mcdonalds','mcdonalds_text','medibank','mercedesbenz','mercedesbenz_text','michelin','microsoft','milka',
            'millerhighlife','mini','miraclewhip','mitsubishi','mk','mobil','motorola','mtv','nasa','nb','nbc','nescafe','netflix','nike',
            'nike_text','nintendo','nissan','nissan_text','nivea','northface','nvidia','obey','olympics','opel','optus','optus_yes','oracle','pampers','panasonic'
            ,'paulaner','pepsi','pepsi_text','pepsi_text1','philadelphia','philips','pizzahut','pizzahut_hut','planters','playstation',
            'poloralphlauren','porsche','porsche_text','prada','puma','puma_text','quick','rbc','recycling','redbull','redbull_text','reebok','reebok1','reebok_text',
            'reeses','renault','republican','rittersport','rolex','rolex_text','ruffles','samsung','santander','santander_text','sap','schwinn',
            'scion_text','sega','select','shell','shell_text','shell_text1','siemens','singha','skechers','sony','soundcloud','soundrop',
            'spar','spar_text','spiderman','sprite','standard_liege','starbucks','stellaartois','subaru','subway','sunchips',
            'superman','supreme','suzuki','t-mobile','tacobell','target','target_text','teslamotors','texaco','thomsonreuters',
            'tigerwash','timberland','tissot','tnt','tommyhilfiger','tostitos','total','toyota','toyota_text','tsingtao','twitter','umbro',
            'underarmour','unicef','uniqlo','uniqlo1','unitednations','ups','us_president','vaio','velveeta','venus','verizon',
            'verizon_text','visa','vodafone','volkswagen','volkswagen_text','volvo','walmart','walmart_text','warnerbros','wellsfargo',
            'wellsfargo_text','wii','williamhill','windows','wordpress','xbox','yahoo','yamaha','yonex','yonex_text','youtube','zara']

input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536, 1536]


def evaluate_coco(img_path, set_name, image_ids, coco, model, threshold=0.05):
    results = []

    regressBoxes = BBoxTransform()
    clipBoxes = ClipBoxes()

    for image_id in tqdm(image_ids):
        image_info = coco.loadImgs(image_id)[0]
        image_path = img_path + image_info['file_name']

        ori_imgs, framed_imgs, framed_metas = preprocess(image_path, max_size=input_sizes[compound_coef])
        x = torch.from_numpy(framed_imgs[0])

        if use_cuda:
            x = x.cuda(gpu)
            x = x.float()
        else:
            x = x.float()

        x = x.unsqueeze(0).permute(0, 3, 1, 2)
        features, regression, classification, anchors = model(x)

        preds = postprocess(x,
                            anchors, regression, classification,
                            regressBoxes, clipBoxes,
                            threshold, nms_threshold)
        
        if not preds:
            continue

        preds = invert_affine(framed_metas, preds)[0]

        scores = preds['scores']
        class_ids = preds['class_ids']
        rois = preds['rois']

        if rois.shape[0] > 0:
            # x1,y1,x2,y2 -> x1,y1,w,h
            rois[:, 2] -= rois[:, 0]
            rois[:, 3] -= rois[:, 1]

            bbox_score = scores

            for roi_id in range(rois.shape[0]):
                score = float(bbox_score[roi_id])
                label = int(class_ids[roi_id])
                box = rois[roi_id, :]

                image_result = {
                    'image_id': image_id,
                    'category_id': label + 1,
                    'score': float(score),
                    'bbox': box.tolist(),
                }

                results.append(image_result)

    if not len(results):
        raise Exception('the model does not provide any valid output, check model architecture and the data input')

    # write output
    filepath = f'{set_name}_bbox_results.json'
    if os.path.exists(filepath):
        os.remove(filepath)
    json.dump(results, open(filepath, 'w'), indent=4)


def _eval(coco_gt, image_ids, pred_json_path):
    # load results in COCO evaluation tool
    coco_pred = coco_gt.loadRes(pred_json_path)

    # run COCO evaluation
    print('BBox')
    coco_eval = COCOeval(coco_gt, coco_pred, 'bbox')
    coco_eval.params.imgIds = image_ids
    coco_eval.evaluate()
    coco_eval.accumulate()
    coco_eval.summarize()



    

In [4]:
SET_NAME = 'JPEGImages'
VAL_GT = f'Annotations\instances_JPEGImages.json'
VAL_IMGS = f'JPEGImages\\'

MAX_IMAGES = 1000
coco_gt = COCO(VAL_GT)
image_ids = coco_gt.getImgIds()[:MAX_IMAGES]

if override_prev_results or not os.path.exists(f'{SET_NAME}_bbox_results.json'):
    model = EfficientDetBackbone(compound_coef=compound_coef, num_classes=len(obj_list)+1,
                                 ratios=eval(anchors_ratios), scales=eval(anchors_scales))
    model.load_state_dict(torch.load(weights_path, map_location=torch.device('cpu')))
    model.requires_grad_(False)
    model.eval()

    if use_cuda:
        model.cuda(gpu)

    evaluate_coco(VAL_IMGS, SET_NAME, image_ids, coco_gt, model)

_eval(coco_gt, image_ids, f'{SET_NAME}_bbox_results.json')


loading annotations into memory...
Done (t=0.36s)
creating index...
index created!


100%|██████████████████████████████████████████████████████████████████████████████| 1000/1000 [01:36<00:00, 10.39it/s]


Loading and preparing results...
DONE (t=0.25s)
creating index...
index created!
BBox
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=6.07s).
Accumulating evaluation results...
DONE (t=1.56s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.015
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.030
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.014
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.006
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.019
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.144
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.190
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.191
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDe

* The metrics used for evaluating the models are IoU, Average precision and recall.
*  in AP, we collect all the predictions made for apples in all the images and rank it in descending order according to the predicted confidence level.
* IOU is a metric that finds the difference between ground truth annotations and predicted bounding boxes. In object detection, the model predicts multiple bounding boxes for each object, and based on the confidence scores of each bounding box it removes unnecessary boxes based on its threshold value. 
* mAP iou=0.5 represents the model has used 0.5 threshold value to remove unnecessary bounding boxes, it is the standard threshold value for most of the models.
* mAP iou=0.75 represents the model has used 0.75 threshold value, By using this we can get accurate results by removing bounding boxes with less than 25% of the intersection with ground truth image.
* mAP small represents the model has given mAP score based on smaller objects in the data.
* mAP large represents the model has given mAP score based on larger objects in the data.
