In [1]:
import os
import pycocotools.coco as coco
from pycocotools.cocoeval import COCOeval

## load validation_annotation

In [2]:
split = 'val'
annot_path = os.path.join('F:\TUM Learning Material\Forschung\CenterNet\CenterNet-master\data\coco', 'annotations','instances_{}2017.json').format(split)
#initialize coco object given file_path
coco = coco.COCO(annot_path)

loading annotations into memory...
Done (t=1.06s)
creating index...
index created!


## load detection 

In [None]:
#coco_dets = coco.loadRes('{}/results.json'.format('F:\TUM Learning Material\Forschung\CenterNet\CenterNet-master\exp\ctdet\hourglass'))

In [4]:
maskrcnn_dets = coco.loadRes('F:\TUM Learning Material\Forschung\maskrcnn_detection_results.json')

Loading and preparing results...
DONE (t=0.44s)
creating index...
index created!


In [10]:
len(sorted(maskrcnn_dets.getCatIds()))

80

In [None]:
class Params:
    '''
    Params for coco evaluation api
    '''
    def setDetParams(self):
        self.imgIds = []
        self.catIds = []
        # np.arange causes trouble.  the data point on arange is slightly larger than the true value
        self.iouThrs = np.linspace(.5, 0.95, int(np.round((0.95 - .5) / .05)) + 1, endpoint=True)
        self.recThrs = np.linspace(.0, 1.00, int(np.round((1.00 - .0) / .01)) + 1, endpoint=True)
        self.maxDets = [1, 10, 100]
        self.areaRng = [[0 ** 2, 1e5 ** 2], [0 ** 2, 32 ** 2], [32 ** 2, 96 ** 2], [96 ** 2, 1e5 ** 2]]
        self.areaRngLbl = ['all', 'small', 'medium', 'large']
        self.useCats = 1


    def __init__(self, iouType='segm'):
        self.setDetParams()
        self.iouType = iouType
        # useSegm is deprecated
        self.useSegm = None

In [None]:
import numpy as np
import datetime
import time
from collections import defaultdict
from pycocotools import mask as maskUtils
import copy

In [None]:
cocoGt   = coco              # ground truth COCO API
cocoDt   = yolo_dets              # detections COCO API
evalImgs = defaultdict(list)   # per-image per-category evaluation results [KxAxI] elements
eval     = {}                  # accumulated evaluation results
_gts = defaultdict(list)       # gt for evaluation
_dts = defaultdict(list)       # dt for evaluation
params = Params(iouType='bbox') # parameters
_paramsEval = {}               # parameters for evaluation
stats = []                     # result summarization
ious = {}                      # ious between all gts and dts
if not cocoGt is None:
    params.imgIds = sorted(cocoGt.getImgIds())
    params.catIds = sorted(cocoGt.getCatIds())

In [None]:
print(len(params.imgIds))
print(len(params.catIds))

In [None]:
len(cocoGt.getAnnIds(imgIds=params.imgIds, catIds=params.catIds)) #all anotations are loaded 

In [None]:
len(cocoDt.getAnnIds(imgIds=params.imgIds, catIds=params.catIds)) # for each image (50000) get 100 detections --> different maximum detection as hypyerparamter will be analyse 

In [None]:
gts=cocoGt.loadAnns(cocoGt.getAnnIds(imgIds=params.imgIds, catIds=params.catIds))
dts=cocoDt.loadAnns(cocoDt.getAnnIds(imgIds=params.imgIds, catIds=params.catIds))

In [None]:
dts[0] #example of gts --> purther processed for evaluation _gt 

In [None]:
# set ignore flag, ignore is not available in all annotations 
for gt in gts:
    gt['ignore'] = gt['ignore'] if 'ignore' in gt else 0
    gt['ignore'] = 'iscrowd' in gt and gt['iscrowd']

## new list for evaluation (_gts, _dts)

In [None]:
# _gts = defaultdict(list)       # gt for evaluation
# _dts = defaultdict(list)       # dt for evaluation
for gt in gts:
    _gts[gt['image_id'], gt['category_id']].append(gt)
for dt in dts:
    _dts[dt['image_id'], dt['category_id']].append(dt)

#evalImgs = defaultdict(list)   # per-image per-category evaluation results
#eval     = {}                  # accumulated evaluation results

In [None]:
#_gts.keys()

In [None]:
imgIds = list(np.unique(params.imgIds))
if params.useCats:
    catIds = list(np.unique(params.catIds))
maxDets = sorted(params.maxDets)

In [None]:
print(len(imgIds))
print(len(catIds))
maxDets

## compute iou between gt and detection (corresponding to imgId and catId)

In [None]:
def computeIoU(params, imgId, catId, _gts, _dts):
    p = params
    gt = _gts[imgId,catId]
    dt = _dts[imgId,catId]

    if len(gt) == 0 and len(dt) ==0:
        return []
    
    inds = np.argsort([-d['score'] for d in dt], kind='mergesort')
    #print(inds)
    dt = [dt[i] for i in inds]
    
    if len(dt) > p.maxDets[-1]: #maximum 100 detection
        dt=dt[0:p.maxDets[-1]] 
    
    #extract bounding boxes 
    g = [g['bbox'] for g in gt]
    d = [d['bbox'] for d in dt]

    # compute iou between each dt and gt region
    iscrowd = [int(o['iscrowd']) for o in gt]
    ious = maskUtils.iou(d,g,iscrowd)
    return ious

In [None]:
ious = {(imgId, catId): computeIoU(params, imgId, catId, _gts, _dts) \
                        for imgId in imgIds
                        for catId in catIds}

In [None]:
print(len(_gts[(139, 64)])) #2 ground_truth bounding boxes 
print(len(_dts[(139, 64)])) #10 detections 

In [None]:
ious[(139,64)].shape

In [None]:
params.areaRng # all, small, middel. large 

## find matching between gt and detection

In [None]:
def evaluateImg(params, ious, imgId, catId, aRng, maxDet):
        '''
        perform evaluation for single category and image
        :return: dict (single image results)
        '''
        p = params
        gt = _gts[imgId,catId]
        dt = _dts[imgId,catId]

        if len(gt) == 0 and len(dt) ==0:
            return None
        
        #case 1: ground_truth bounding box already set to ignore 
        #case 2: bounding boxes out of evaluation range 
        for g in gt:
            if g['ignore'] or (g['area']<aRng[0] or g['area']>aRng[1]):
                g['_ignore'] = 1
            else:
                g['_ignore'] = 0

        # sort dt highest score first, sort gt ignore last
        gtind = np.argsort([g['_ignore'] for g in gt], kind='mergesort')
        gt = [gt[i] for i in gtind]
        dtind = np.argsort([-d['score'] for d in dt], kind='mergesort')
        dt = [dt[i] for i in dtind[0:maxDet]]
        iscrowd = [int(o['iscrowd']) for o in gt]
        # load computed ious
        ious = ious[imgId, catId][:, gtind] if len(ious[imgId, catId]) > 0 else ious[imgId, catId]

        T = len(p.iouThrs)
        G = len(gt)
        D = len(dt)
        gtm  = np.zeros((T,G))
        dtm  = np.zeros((T,D))
        gtIg = np.array([g['_ignore'] for g in gt])
        dtIg = np.zeros((T,D))
        if not len(ious)==0:
            for tind, t in enumerate(p.iouThrs):
                for dind, d in enumerate(dt):
                    # information about best match so far (m=-1 -> unmatched)
                    iou = min([t,1-1e-10])
                    m   = -1
                    for gind, g in enumerate(gt):
                        # if this gt already matched, and not a crowd, continue
                        if gtm[tind,gind]>0 and not iscrowd[gind]:
                            continue
                        # if dt matched to regular (not ignores) gt, and now on ignore gt, stop
                        if m>-1 and gtIg[m]==0 and gtIg[gind]==1:
                            break
                        # continue to next gt unless better match made
                        if ious[dind,gind] < iou:
                            continue
                        # if match successful and best so far, store appropriately
                        iou=ious[dind,gind]
                        m=gind
                    # if match made store id of match for both dt and gt
                    if m ==-1:
                        continue
                    dtIg[tind,dind] = gtIg[m]
                    dtm[tind,dind]  = gt[m]['id']
                    gtm[tind,m]     = d['id']
        # set unmatched detections outside of area range to ignore
        a = np.array([d['area']<aRng[0] or d['area']>aRng[1] for d in dt]).reshape((1, len(dt)))
        dtIg = np.logical_or(dtIg, np.logical_and(dtm==0, np.repeat(a,T,0)))
        # store results for given image and category
        return {
                'image_id':     imgId,
                'category_id':  catId,
                'aRng':         aRng,
                'maxDet':       maxDet,
                'dtIds':        [d['id'] for d in dt],
                'gtIds':        [g['id'] for g in gt],
                'dtMatches':    dtm,
                'gtMatches':    gtm,
                'dtScores':     [d['score'] for d in dt],
                'gtIgnore':     gtIg,
                'dtIgnore':     dtIg,
            }

In [None]:
maxDet = params.maxDets[-1]
evalImgs = [evaluateImg(params, ious, imgId, catId, areaRng, maxDet)
                 for catId in catIds
                 for areaRng in params.areaRng
                 for imgId in params.imgIds
           ]
_paramsEval = copy.deepcopy(params)

In [None]:
evalImgs[0]

## accumulate results

In [None]:
def accumulate(params, _paramsEval, evalImgs):
        '''
        Accumulate per image evaluation results and store the result in self.eval
        :param p: input params for evaluation
        :return: None
        '''
        print('Accumulating evaluation results...')
        # allows input customized parameters
        p = params
        p.catIds = p.catIds if p.useCats == 1 else [-1]
        T           = len(p.iouThrs)
        R           = len(p.recThrs)
        K           = len(p.catIds) if p.useCats else 1
        A           = len(p.areaRng)
        M           = len(p.maxDets)
        precision   = -np.ones((T,R,K,A,M)) # -1 for the precision of absent categories
        recall      = -np.ones((T,K,A,M))
        scores      = -np.ones((T,R,K,A,M))

        # create dictionary for future indexing
        _pe = _paramsEval
        catIds = _pe.catIds if _pe.useCats else [-1]
        setK = set(catIds)
        setA = set(map(tuple, _pe.areaRng))
        setM = set(_pe.maxDets)
        setI = set(_pe.imgIds)
        # get inds to evaluate
        k_list = [n for n, k in enumerate(p.catIds)  if k in setK] #category
        m_list = [m for n, m in enumerate(p.maxDets) if m in setM] #maximum_Detections 
        a_list = [n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng)) if a in setA] #4 areaRanges: total, small, middle, large 
        i_list = [n for n, i in enumerate(p.imgIds)  if i in setI] #all_images 5000
        I0 = len(_pe.imgIds)
        A0 = len(_pe.areaRng)
        # retrieve E at each category, area range, and max number of detections
        for k, k0 in enumerate(k_list):  #category
            Nk = k0*A0*I0
            for a, a0 in enumerate(a_list): #4 areaRanges: total, small, middle, large 
                Na = a0*I0
                for m, maxDet in enumerate(m_list): #all_images 5000
                    E = [evalImgs[Nk + Na + i] for i in i_list]
                    E = [e for e in E if not e is None]
                    if len(E) == 0:
                        continue
                    
                    #same area range, max number of detections --> evaluate all 5000 val images  
                    dtScores = np.concatenate([e['dtScores'][0:maxDet] for e in E])

                    # different sorting method generates slightly different results.
                    inds = np.argsort(-dtScores, kind='mergesort')
                    dtScoresSorted = dtScores[inds]
                    
                    #detection matching matrix 
                    dtm  = np.concatenate([e['dtMatches'][:,0:maxDet] for e in E], axis=1)[:,inds]
                    #detection ignore matrix 
                    dtIg = np.concatenate([e['dtIgnore'][:,0:maxDet]  for e in E], axis=1)[:,inds]
                    #ground_truth ignore matrix 
                    gtIg = np.concatenate([e['gtIgnore'] for e in E])
                    #calculate recall 
                    npig = np.count_nonzero(gtIg==0 )
                    
                    if npig == 0:
                        continue
                    
                    #true positive and false positive 
                    #detection but not matched to any ground_truth --> false positive 
                    tps = np.logical_and(               dtm,  np.logical_not(dtIg) )
                    fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg) )

                    tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float)
                    fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float)
                    for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)): #extract each row --> correspond to different iouThresholds  
                        tp = np.array(tp) 
                        fp = np.array(fp)
                        nd = len(tp)
                        rc = tp / npig
                        pr = tp / (fp+tp+np.spacing(1)) #true positve/ all detections 
                        q  = np.zeros((R,))
                        ss = np.zeros((R,))

                        if nd:
                            recall[t,k,a,m] = rc[-1]
                        else:
                            recall[t,k,a,m] = 0

                        # numpy is slow without cython optimization for accessing elements
                        # use python array gets significant speed improvement
                        pr = pr.tolist(); q = q.tolist()

                        for i in range(nd-1, 0, -1):
                            if pr[i] > pr[i-1]:
                                pr[i-1] = pr[i]

                        inds = np.searchsorted(rc, p.recThrs, side='left')
                        try:
                            for ri, pi in enumerate(inds):
                                q[ri] = pr[pi]
                                ss[ri] = dtScoresSorted[pi]
                        except:
                            pass
                        precision[t,:,k,a,m] = np.array(q)
                        scores[t,:,k,a,m] = np.array(ss)
        return {
            'params': p,
            'counts': [T, R, K, A, M],
            'precision': precision,
            'recall':   recall,
            'scores': scores,
        }

In [None]:
params.recThrs #recall thresholds

In [None]:
eval = accumulate(params, _paramsEval, evalImgs)

In [None]:
eval['precision'].shape

In [None]:
eval['recall'].shape

In [None]:
def _summarize( eval, ap=1, iouThr=None, areaRng='all', maxDets=100 ):
        p = self.params
        iStr = ' {:<18} {} @[ IoU={:<9} | area={:>6s} | maxDets={:>3d} ] = {:0.3f}'
        titleStr = 'Average Precision' if ap == 1 else 'Average Recall'
        typeStr = '(AP)' if ap==1 else '(AR)'
        iouStr = '{:0.2f}:{:0.2f}'.format(p.iouThrs[0], p.iouThrs[-1]) \
            if iouThr is None else '{:0.2f}'.format(iouThr)

        aind = [i for i, aRng in enumerate(p.areaRngLbl) if aRng == areaRng]
        mind = [i for i, mDet in enumerate(p.maxDets) if mDet == maxDets]
        
        if ap == 1:
            # dimension of precision: [TxRxKxAxM]
            s = eval['precision']
            # IoU
            if iouThr is not None:
                t = np.where(iouThr == p.iouThrs)[0]
                s = s[t]
            s = s[:,:,:,aind,mind]
        else:
            # dimension of recall: [TxKxAxM]
            s = eval['recall']
            if iouThr is not None:
                t = np.where(iouThr == p.iouThrs)[0]
                s = s[t]
            s = s[:,:,aind,mind]
        if len(s[s>-1])==0:
            mean_s = -1
        else:
            mean_s = np.mean(s[s>-1])
        print(iStr.format(titleStr, typeStr, iouStr, areaRng, maxDets, mean_s))
        return mean_s

In [None]:
aind = [i for i, aRng in enumerate(params.areaRngLbl) if aRng == 'all'] #area_range index 
mind = [i for i, mDet in enumerate(params.maxDets) if mDet == 10] #mDetection index 

In [None]:
eval

In [None]:
precision_matrix = eval['precision']
s = precision_matrix[:,:,:,aind,mind]
precision_result = s[s>-1].reshape(10,101,80)
mean_s = np.mean(precision_result, axis=0)
category_result = np.mean(mean_s, axis=0)

In [None]:
precision_sort_idx = np.argsort(-category_result, kind='mergesort')

In [None]:
precision_sort_idx

In [None]:
class_name = ['person', 'bicycle', 'car', 'motorcycle', 'airplane','bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
      'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse',
      'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack',
      'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis',
      'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove',
      'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass',
      'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich',
      'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake',
      'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv',
      'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave',
      'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
      'scissors', 'teddy bear', 'hair drier', 'toothbrush']

In [None]:
i = 0
for i in range(len(precision_sort_idx)):
    print(class_name[precision_sort_idx[i]])

In [None]:
class_name[precision_sort_idx[0]-1]

In [None]:
np.mean(category_result)

In [None]:
recall_matrix = eval['recall']
recall = recall_matrix[:,:,aind,mind]
recall_result = recall[recall>-1].reshape(10,80)
mean_recall = np.mean(recall_result, axis=0)

In [None]:
mean_recall

In [None]:
recall_sort_idx = np.argsort(-mean_recall, kind='mergesort')

In [None]:
recall_sort_idx

In [None]:
i = 0
for i in range(len(recall_sort_idx)):
    print(class_name[recall_sort_idx[i]])