In [1]:
! mkdir output

mkdir: cannot create directory ‘output’: File exists


In [2]:
import os
import sys
import glob
import cv2
import numpy as np
import time
import xml.etree.ElementTree as ET
from matplotlib import pyplot as plt
from PIL import Image
from collections import Counter

from driver import io_shape_dict
from driver_base import FINNExampleOverlay

In [3]:
# download and unzip
#! pip3 install gdown
#! gdown https://drive.google.com/uc?id=1ZE0XgaVlYMl9Y7QP2-ggyGICxL0T5p-T
#! unzip voc2007.zip
#! unzip pynq_deployment_0nptxqpx-20221227T055358Z-001.zip 

In [4]:
def intersection_over_union(boxes_preds, boxes_labels):
    #print(boxes_preds, boxes_labels)
    box1_x1 = boxes_preds[..., 0:1] - boxes_preds[..., 2:3] / 2
    box1_y1 = boxes_preds[..., 1:2] - boxes_preds[..., 3:4] / 2
    box1_x2 = boxes_preds[..., 0:1] + boxes_preds[..., 2:3] / 2
    box1_y2 = boxes_preds[..., 1:2] + boxes_preds[..., 3:4] / 2
    box2_x1 = boxes_labels[..., 0:1] - boxes_labels[..., 2:3] / 2
    box2_y1 = boxes_labels[..., 1:2] - boxes_labels[..., 3:4] / 2
    box2_x2 = boxes_labels[..., 0:1] + boxes_labels[..., 2:3] / 2
    box2_y2 = boxes_labels[..., 1:2] + boxes_labels[..., 3:4] / 2

    x1 = np.maximum(box1_x1, box2_x1)
    y1 = np.maximum(box1_y1, box2_y1)
    x2 = np.minimum(box1_x2, box2_x2)
    y2 = np.minimum(box1_y2, box2_y2)

    intersection = (x2 - x1).clip(0) * (y2 - y1).clip(0)

    box1_area = abs((box1_x2 - box1_x1) * (box1_y2 - box1_y1))
    box2_area = abs((box2_x2 - box2_x1) * (box2_y2 - box2_y1))
    
    return intersection / (box1_area + box2_area - intersection + 1e-6)

def non_max_suppression(bboxes, iou_threshold, threshold):

    bboxes = [box for box in bboxes if box[1] > threshold]
    bboxes = sorted(bboxes, key=lambda x: x[1], reverse=True)
    bboxes_after_nms = []

    while bboxes:
        #print(bboxes)
        chosen_box = bboxes.pop(0)
        bboxes = [box for box in bboxes if box[0] != chosen_box[0] or intersection_over_union(np.array(chosen_box[2:]), np.array(box[2:])) < iou_threshold ]
        bboxes_after_nms.append(chosen_box)
        
    return bboxes_after_nms

In [5]:
def convert_cellboxes(predictions, S=3, C=20):

    batch_size = predictions.shape[0]
    predictions = predictions.reshape(batch_size, S, S, C + 10)
    bboxes1 = predictions[..., C + 1:C + 5]
    bboxes2 = predictions[..., C + 6:C + 10]
    scores = np.concatenate(
        (np.expand_dims(predictions[..., C], 0), np.expand_dims(predictions[..., C + 5],0)), axis=0
    )
    best_box = np.expand_dims(scores.argmax(0),-1)
    best_boxes = bboxes1 * (1 - best_box) + best_box * bboxes2
    cell_indices = np.expand_dims(np.expand_dims(np.arange(S).reshape(1,-1).repeat(S, axis=0), 0), -1)
    x = 1 / S * (best_boxes[..., :1] + cell_indices)
    y = 1 / S * (best_boxes[..., 1:2] + np.transpose(cell_indices ,(0, 2, 1, 3)))
    w_y = 1 / S * best_boxes[..., 2:4]
    converted_bboxes = np.concatenate((x, y, w_y), axis=-1)
    predicted_class = np.expand_dims(predictions[..., :C].argmax(-1), -1)
    best_confidence = np.expand_dims(np.maximum(predictions[..., C], predictions[..., C + 5]), -1)
    converted_preds = np.concatenate((predicted_class, best_confidence, converted_bboxes), axis=-1)
    
    return converted_preds


def cellboxes_to_boxes(out, S=3):
    converted_pred = convert_cellboxes(out).reshape(out.shape[0], S * S, -1)
    #converted_pred[..., 0] = converted_pred[..., 0].long()
    all_bboxes = []

    for ex_idx in range(out.shape[0]):
        bboxes = []

        for bbox_idx in range(S * S):
            bboxes.append([x.item() for x in converted_pred[ex_idx, bbox_idx, :]])
        all_bboxes.append(bboxes)

    return all_bboxes

In [6]:
def blender(predbboxes, gtbboxes, filenames, widths, heights):
    #print(predbboxes, gtbboxes)
    label_dictionary = {0:'aeroplane', 1:'bicycle', 2:'bird', 3:'boat', 4:'bottle', 5:'bus', 6:'car', 7:'cat', 8:'chair', 9:'cow', 10:'diningtable', 11:'dog',
        12:'horse', 13:'motorbike', 14:'person', 15:'pottedplant', 16:'sheep', 17:'sofa', 18:'train', 19:'tvmonitor'}

    img_w = widths
    img_h = heights
    cv2image = cv2.imread(os.path.join(filenames))
    #print(cv2image)
    # blender gt
    for n in range(len(gtbboxes)):
        bbox_width = float(gtbboxes[n][4]) * img_w
        bbox_height = float(gtbboxes[n][5]) * img_h
        center_x = float(gtbboxes[n][2]) * img_w
        center_y = float(gtbboxes[n][3]) * img_h
        min_x, min_y = center_x - (bbox_width / 2), center_y - (bbox_height / 2)
        max_x, max_y = center_x + (bbox_width / 2), center_y + (bbox_height / 2)
        #print(min_x,min_y,max_x,max_y)
        cv2.putText(cv2image, label_dictionary[int(gtbboxes[n][0])], (int(min_x), int(min_y-6)), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 0), 1, cv2.LINE_AA)
        cv2.rectangle(cv2image, (int(min_x),int(min_y)), (int(max_x),int(max_y)), (255,255,0), 2)
    
    for n in range(len(predbboxes)):
        # blender pred
        bbox_width = float(predbboxes[n][4]) * img_w
        bbox_height = float(predbboxes[n][5]) * img_h
        center_x = float(predbboxes[n][2]) * img_w
        center_y = float(predbboxes[n][3]) * img_h
        min_x, min_y = center_x - (bbox_width / 2), center_y - (bbox_height / 2)
        max_x, max_y = center_x + (bbox_width / 2), center_y + (bbox_height / 2)
        #print(min_x,min_y,max_x,max_y)
        cv2.putText(cv2image, label_dictionary[int(predbboxes[n][0])], (int(min_x), int(min_y-6)), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 255), 1, cv2.LINE_AA)
        cv2.rectangle(cv2image, (int(min_x),int(min_y)), (int(max_x),int(max_y)), (255,0,255), 2)
        #print(label_dictionary[int(predbboxes[n][0])])
        
    cv2.imwrite(f'./output/{filenames}', cv2image)

In [7]:
def mean_average_precision(
    pred_boxes, true_boxes, iou_threshold=0.5, box_format="midpoint", num_classes=20
):

    # list storing all AP for respective classes
    average_precisions = []

    # used for numerical stability later on
    epsilon = 1e-6

    for c in range(num_classes):
        detections = []
        ground_truths = []
        for detection in pred_boxes:
            if detection[1] == c:
                detections.append(detection)

        for true_box in true_boxes:
            if true_box[1] == c:
                ground_truths.append(true_box)

        amount_bboxes = Counter([gt[0] for gt in ground_truths])
        
        for key, val in amount_bboxes.items():
            amount_bboxes[key] = np.zeros(val)

        detections.sort(key=lambda x: x[2], reverse=True)
        TP = np.zeros((len(detections)))
        FP = np.zeros((len(detections)))
        total_true_bboxes = len(ground_truths)
        
        if total_true_bboxes == 0:
            continue

        for detection_idx, detection in enumerate(detections):
    
            ground_truth_img = [bbox for bbox in ground_truths if bbox[0] == detection[0]]
            
            num_gts = len(ground_truth_img)
            best_iou = 0

            for idx, gt in enumerate(ground_truth_img):
                
                iou = intersection_over_union(np.array((detection[3:])), np.array(gt[3:]))

                if iou > best_iou:
                    best_iou = iou
                    best_gt_idx = idx
            
            if best_iou > iou_threshold:
                # only detect ground truth detection once
                if amount_bboxes[detection[0]][best_gt_idx] == 0:
                    # true positive and add this bounding box to seen
                    TP[detection_idx] = 1
                    amount_bboxes[detection[0]][best_gt_idx] = 1
                else:
                    FP[detection_idx] = 1

            # if IOU is lower then the detection is a false positive
            else:
                FP[detection_idx] = 1

        TP_cumsum = np.cumsum(TP, axis=0)
        FP_cumsum = np.cumsum(FP, axis=0)
        recalls = TP_cumsum / (total_true_bboxes + epsilon)
        precisions = np.divide(TP_cumsum, (TP_cumsum + FP_cumsum + epsilon))
        precisions = np.concatenate((np.array([1]), precisions))
        recalls = np.concatenate((np.array([0]), recalls))
        # torch.trapz for numerical integration
        average_precisions.append(np.trapz(precisions, recalls))
    #print("ap ", average_precisions)
    return sum(average_precisions) / len(average_precisions)

In [8]:
S = 3
B = 2
C = 20
iou_threshold = 0.5
threshold = 0.4

#test_file = []
'''for line in f:
    test_file.append(line.replace('\n', ''))
f.close()'''
test_files = []
test_files.append('./000355')

label_dictionary = {'aeroplane':0, 'bicycle':1, 'bird':2, 'boat':3, 'bottle':4, 'bus':5, 'car':6, 'cat':7, 'chair':8, 'cow':9, 'diningtable':10, 'dog':11,
                    'horse':12, 'motorbike':13, 'person':14, 'pottedplant':15, 'sheep':16, 'sofa':17, 'train':18, 'tvmonitor':19}
all_pred_boxes = []
all_true_boxes = []
idx = 0

driver = FINNExampleOverlay(
    bitfile_name="./bitfile/finn-accel.bit",
    platform="zynq-iodma",
    io_shape_dict=io_shape_dict,
    batch_size=1,
    runtime_weight_dir="runtime_weights/",
)
for number, test_file in enumerate(test_files):
    
    filexml = str(test_file) + ".xml"
        
    boxes = []
    tree = ET.parse(filexml)
    root = tree.getroot()
    filename = root.find('filename').text
    #print(filename)
    
    img_org = cv2.imread(filename)
    img = img_org.copy()  
    h, w, _ = img_org.shape
    #img = img.astype(np.uint8)
    img = cv2.resize(img, (48, 48), interpolation=cv2.INTER_AREA)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    #driver_in = np.expand_dims(img, 0)
    #print(driver_in.shape)
    
    img_width = int(root.find('size').find('width').text)
    img_height = int(root.find('size').find('height').text)
    
    for member in root.findall('object'):

        name = member.find('name').text
        label = label_dictionary[name]

        xmin = int(member.find('bndbox').find('xmin').text)
        xmax = int(member.find('bndbox').find('xmax').text)
        
        ymin = int(member.find('bndbox').find('ymin').text)
        ymax = int(member.find('bndbox').find('ymax').text)
        
        centerx = ((xmax + xmin) / 2) / img_width
        centery = ((ymax + ymin) / 2) / img_height
        boxwidth = (xmax - xmin) / img_width
        boxheight = (ymax - ymin) / img_height

        boxes.append([label, centerx, centery, boxwidth, boxheight])

    boxes = np.array(boxes)
    
    label_matrix = np.zeros((S, S, C + 5 * B))

    for box in boxes:
        class_label, x, y, width, height = box.tolist()
        class_label = int(class_label)

        i, j = int(S * y), int(S * x)
        x_cell, y_cell = S * x - j, S * y - i

        width_cell, height_cell = (width * S, height * S)

        if label_matrix[i, j, C] == 0:
            label_matrix[i, j, C] = 1

            box_coordinates = np.array([x_cell, y_cell, width_cell, height_cell])

            label_matrix[i, j, 21:25] = box_coordinates

            label_matrix[i, j, class_label] = 1
    label_matrix = np.expand_dims(label_matrix, 0)
    

    driver_in = np.expand_dims(img, 0)
    output = driver.execute(driver_in)
    #print(output)
    output =0.0034094545990228653*output+0.2782003879547119
    predictions = output.reshape(1, 3, 3, 30)
    
    true_bboxes = cellboxes_to_boxes(label_matrix)
    pred_bboxes = cellboxes_to_boxes(predictions)
    
    nms_pred_bboxes = non_max_suppression(pred_bboxes[0], iou_threshold=iou_threshold, threshold=threshold)
    gtbboxes = [box for box in true_bboxes[0] if box[1] > threshold]
    
    blender(nms_pred_bboxes, gtbboxes, filename, img_width, img_height)
    
    for nms_box in nms_pred_bboxes:
        all_pred_boxes.append([idx] + nms_box)

    for box in true_bboxes[0]:
        if box[1] > threshold:
            all_true_boxes.append([idx] + box)

    idx += 1
    

