In [1]:
# https://github.com/amdegroot/ssd.pytorch/blob/master/eval.py
import os
import sys
module_path = os.path.abspath(os.path.join('../../../../ssd.pytorch'))
if module_path not in sys.path:
    sys.path.append(module_path)

from __future__ import print_function
import torch
import torch.nn as nn
import torch.backends.cudnn as cudnn
import torchvision.transforms as transforms
from torch.autograd import Variable
from matplotlib import pyplot as plt

%matplotlib inline


import cv2
import pickle
import re
import numpy as np

from ssd import build_ssd

In [2]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

<IPython.core.display.Javascript object>

In [3]:
# calculate IOU
def bb_intersection_over_union(boxA, boxB):
    # determine the (x, y)-coordinates of the intersection rectangle
    xA = max(boxA[0], boxB[0])
    yA = max(boxA[1], boxB[1])
    xB = min(boxA[2], boxB[2])
    yB = min(boxA[3], boxB[3])

    # compute the area of intersection rectangle
    interArea = (xB - xA + 1) * (yB - yA + 1)

    # compute the area of both the prediction and ground-truth
    # rectangles
    boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1)
    boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1)

    # compute the intersection over union by taking the intersection
    # area and dividing it by the sum of prediction + ground-truth
    # areas - the interesection area
    iou = interArea / float(boxAArea + boxBArea - interArea)

    # return the intersection over union value
    return iou

# calculate TP,FP,FN from predictions and groundtruth
def calculate_metrics(ground_truths, predictions, iou_threshold = 0.5):
    '''
    NOTE:
    if in one image, we have multiple proposals for a particular object that is considered truly classified, 
    we only count one proposal as TP, and the others as FP
    
    TO DO : confidence based sorting to be done. But it won't affect TP,FP,FN

    ground_truths : list of ground_truth in form of tuples (xmin,ymin,xmax,ymax)
    predictions : list of prediction in form of tuples (xmin,ymin,xmax,ymax)
    '''
    TP = 0
    FP = 0
    FN = 0
    pred_found_map = {i : False for i in range(len(ground_truths))}
    if len(ground_truths) > 0:
        if len(predictions) > 0:
            for pred in predictions:
                pred_found = False
                for index, gt in enumerate(ground_truths):
                    iou = bb_intersection_over_union(pred, gt)
                    # print('IOU : {0}'.format(iou))
                    if iou > iou_threshold:
                        if pred_found_map[index] == False:
                            TP = TP + 1
                            pred_found = True
                            pred_found_map[index] = True
                            break
                if pred_found == False:
                    FP = FP + 1

            if (len(ground_truths) - len(predictions)) > 0:
                FN = FN + len(ground_truths) - len(predictions)
        else:
            FN = FN + len(ground_truths)
    else:
        if len(predictions) > 0:
            FP = FP + len(predictions)
    
    return (TP, FP, FN)


In [4]:
def calculate_metrics_for_image(model, img_file, ground_truth, cuda=False, person_class_index=15):
    '''
    Model will give all detections. We are only interested in person, person class index : 15
    '''
    
    # TO DO : basic function args validation
    
    # read image and corresponding annotations
    img = cv2.imread(img_file, cv2.IMREAD_COLOR)
    rgb_image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    height, width, channels = img.shape

    # subtracting mean from three channels
    x = cv2.resize(img, (300, 300)).astype(np.float32) # input dimensions used
    x -= (104.0, 117.0, 123.0) # mean of trained model
    x = x.astype(np.float32)
    x = x[:, :, ::-1].copy()
    x = torch.from_numpy(x).permute(2, 0, 1)

    # detections
    xx = Variable(x.unsqueeze(0)) 
    if cuda:
        xx = xx.cuda()
    detections = model(xx).data

    # Contains array of [probabilty, xmin, ymin, xmax, ymax]
    person_detections = detections[0, person_class_index, :]

    # scale back to original size
    scale = torch.Tensor(rgb_image.shape[1::-1]).repeat(2)

    # create array of predictions with confidence
    predictions_conf_list = []
    predictions_list = []

    # iterate through the predicted detections
    for i in range(person_detections.size(1)):
        if person_detections[i][0] > 0.5:
            predictions_conf_list.append((person_detections[i][0], (person_detections[i,1:] * scale).cpu().numpy()))
            predictions_list.append((person_detections[i,1:] * scale).cpu().numpy())

    TP, FP, FN = calculate_metrics([list(gt[0:4]) for gt in ground_truth], predictions_list)
    # print('TP: {0}, FP: {1}, FN: {2}'.format(TP, FP, FN))
    return (TP, FP, FN)

In [7]:
cuda = True

# Load pretrained SSD model
net = build_ssd('test', 300, 21)    # initialize SSD
net.load_weights('../../../../ssd.pytorch/weights/ssd300_mAP_77.43_v2.pth')

if cuda:
    net.cuda()

# initialize metrics
TP_fin = 0
FP_fin = 0
FN_fin = 0

# full data
r_folder = '/home/vijin/iith/project/data/mini-drone-data_processed/test'
for file in os.listdir(r_folder):
    if os.path.isdir('{0}/{1}'.format(r_folder,file)):
        
        print('Processing {0} ...'.format(file))
        # folder level details
        video_file_name = file
        folder = '{0}/{1}'.format(r_folder,file)
        annotation_file_name = '{0}/{1}_ann.obj'.format(folder,video_file_name)
        annotation_map = pickle.load(open(annotation_file_name,'rb'))
        
        # Iterate through the folder
        for x in os.listdir(folder):
            if '.jpg' in x:
                frame_number_search = re.search('.*_frame_(\d+).jpg', x, re.IGNORECASE)
                if frame_number_search:
                    frame_number = int(frame_number_search.group(1))
                    if frame_number in annotation_map.keys():
                        img_file = '{0}/{1}'.format(folder,x)
                        ground_truth = annotation_map[frame_number]
                        TP, FP, FN = calculate_metrics_for_image(net, img_file, ground_truth, cuda)
                        TP_fin += TP
                        FP_fin += FP
                        FN_fin += FN
                        


# Average Precision & Recall
AP = TP_fin/(TP_fin+FP_fin) 
Recall = TP_fin/(TP_fin+FN_fin)

print('AP@0.5 : {0}%'.format(AP*100))
print('Recall@0.5 : {0}%'.format(Recall * 100))

Loading weights into state dict...
Finished!
Processing Normal_Follow_Day_Half_1_1_1 ...
Processing BadParking_Static_Day_Half_1_1_1 ...
Processing StealingInside_Follow_Day_Half_1_1_1 ...
Processing StealingPedestrian_Follow_Day_Half_0_3_Rucksack_1 ...
Processing Broken_CloseUp_Day_Half_1_1_2 ...
Processing StealingPedestrian_Static_Day_Half_0_3_2 ...
Processing Crash_Follow_Day_Half_0_2_2 ...
Processing StealingCar_CloseUp_Day_Half_1_3_1 ...
Processing StealingPedestrian_Static_Day_Half_0_3_1 ...
Processing Suspicious_Static_Day_Half_0_2_1 ...
Processing Normal_Static_Day_Half_1_1_1 ...
Processing Normal_Circle_Day_Half_0_7_2 ...
Processing Crash_Follow_Day_Half_0_2_1 ...
Processing StealingCar_CloseUp_Day_Empty_1_m_1 ...
Processing Normal_Circle_Day_Half_0_7_3 ...
Processing Suspicious_Static_Day_Half_0_1_1 ...
Processing Attack_CloseUp_Day_Empty_1_2_1 ...
Processing Normal_Static_Night_Empty_1_3_1 ...
Processing BadParking_Static_Day_Full_1_1_1 ...
Processing Suspicious_Follow_Day_