In [None]:
#################### import all libraries and initializations ############
import sys
import os
sys.path.append(os.path.abspath("../common"))
import numpy as np 
import time
import math
from PIL import Image
import cv2
from datetime import datetime
from pynq import Xlnk
from pynq import Overlay
import pynq
import struct
from multiprocessing import Process, Pipe, Queue, Event, Manager, Value, Lock
import dac_sdc



print('\n**** Running iSmart ****')
xlnk = Xlnk()
xlnk.xlnk_reset()

team_name = 'iSmart'
team = dac_sdc.Team(team_name, batch_size = 100)

########## Allocate memory for weights and off-chip buffers
bus512 = 'B,'*63 + 'B'
dt_512 = np.dtype(bus512)

bus256 = 'B,'*31 + 'B'
dt_256 = np.dtype(bus256)

img = xlnk.cma_array(shape=(3, 162*2,322*2), dtype=np.int32)
conv_weight_1x1_all = xlnk.cma_array(shape=(413, 32), dtype=dt_512)
conv_weight_3x3_all = xlnk.cma_array(shape=(64, 3, 3), dtype=dt_512)
bias_all = xlnk.cma_array(shape=(106), dtype=dt_512)
DDR_all = xlnk.cma_array(shape=(524288*2), dtype=dt_256)
predict_boxes = xlnk.cma_array(shape=(4, 5), dtype=np.float32)
constant = xlnk.cma_array(shape=(4, 3), dtype=np.int32)

print("Allocating memory done")


########### Load parameters from SD card to DDR
params = np.fromfile(str(team.team_dir) + "/iSmart.bin", dtype=dt_512)
idx = 0
np.copyto(conv_weight_1x1_all, params[idx:idx+conv_weight_1x1_all.size].reshape(conv_weight_1x1_all.shape))
idx += conv_weight_1x1_all.size
np.copyto(conv_weight_3x3_all, params[idx:idx+conv_weight_3x3_all.size].reshape(conv_weight_3x3_all.shape))
idx += conv_weight_3x3_all.size
np.copyto(bias_all, params[idx:idx+bias_all.size].reshape(bias_all.shape))
print("Parameters loading done")


################### Download the overlay
overlay = Overlay(team.get_bitstream_path())
print("Bitstream loaded")

################## Utility functions 
def stitch(image_queue, name_queue, global_img_id):
    blank = Image.new('RGB', (644, 324), (127, 127, 127))
    while True:
        batch = team.get_next_batch()
        if batch == None:
            break
        
        local_queue = []
        for i in range(0, int((len(batch)-4)/2), 4):
            pic_name = batch[i+0]
            image = Image.open(pic_name)
            image = image.convert('RGB')
            image = image.resize((320, 160))
            blank.paste(image, (1, 1))
            
            pic_name = batch[i+1]
            image = Image.open(pic_name)
            image = image.convert('RGB')
            image = image.resize((320, 160))
            blank.paste(image, (323, 1))

            pic_name = batch[i+2]
            image = Image.open(pic_name)
            image = image.convert('RGB')
            image = image.resize((320, 160))
            blank.paste(image, (1, 163))

            pic_name = batch[i+3]
            image = Image.open(pic_name)
            image = image.convert('RGB')
            image = image.resize((320, 160))
            blank.paste(image, (323, 163))
            
            image_stitched = np.transpose(blank, (2, 0, 1))
            local_queue.append(image_stitched)

        while True:
            if lock1.acquire():
                for ele in local_queue:
                    while image_queue.full():
                        continue
                    image_queue.put(ele)
                lock2.release()
                break
    
    
def stitch2(image_queue, name_queue, global_img_id):
    blank = Image.new('RGB', (644, 324), (127, 127, 127))
    while True:
        batch = team.get_next_batch()
        if batch == None:
            break
            
        local_queue = []
        for i in range(int((len(batch)-4)/2), int((len(batch))), 4):
            pic_name = batch[i+0]
            image = Image.open(pic_name)
            image = image.convert('RGB')
            image = image.resize((320, 160))
            blank.paste(image, (1, 1))
            
            pic_name = batch[i+1]
            image = Image.open(pic_name)
            image = image.convert('RGB')
            image = image.resize((320, 160))
            blank.paste(image, (323, 1))

            pic_name = batch[i+2]
            image = Image.open(pic_name)
            image = image.convert('RGB')
            image = image.resize((320, 160))
            blank.paste(image, (1, 163))

            pic_name = batch[i+3]
            image = Image.open(pic_name)
            image = image.convert('RGB')
            image = image.resize((320, 160))
            blank.paste(image, (323, 163))

            image_stitched = np.transpose(blank, (2, 0, 1))
            local_queue.append(image_stitched)

        while True:
            if lock2.acquire():
                for ele in local_queue:
                    while image_queue.full():
                        continue
                    image_queue.put(ele)
                lock1.release()
                break

            
def compute_bounding_box(predicts, boxes, output_queue):
    predict_boxes = np.empty([4, 5], dtype=np.float32)
    constant = np.empty([4, 3], dtype=np.int32)
    box = [1.4940052559648322, 2.3598481287086823, 4.0113013115312155, 5.760873975661669]
    
    while True:
        batch = team.get_next_batch()
        if batch == None:
            break
            
        for i in range(0, len(batch), 4):
            while output_queue.empty():
                continue
                
            outputs = output_queue.get()
            outputs_boxes = outputs[0]
            outputs_index = outputs[1]
            np.copyto(predict_boxes, np.array(outputs_boxes))
            np.copyto(constant, np.array(outputs_index))
                
            for idx in range(0, 4):
                predict_boxes[idx][0] = 1.0 / (1.0 + math.exp(-predict_boxes[idx][0])) + constant[idx][1];
                predict_boxes[idx][1] = 1.0 / (1.0 + math.exp(-predict_boxes[idx][1])) + constant[idx][2];

                if( constant[idx][0] == 0 ):
                    predict_boxes[idx][2] = math.exp(predict_boxes[idx][2]) * box[0];
                    predict_boxes[idx][3] = math.exp(predict_boxes[idx][3]) * box[1];
                else:
                    predict_boxes[idx][2] = math.exp(predict_boxes[idx][2]) * box[2];
                    predict_boxes[idx][3] = math.exp(predict_boxes[idx][3]) * box[3];
                predict_boxes[idx][4] = 1.0 / (1.0 + math.exp(-predict_boxes[idx][4]));

                predict_boxes[idx][0] = predict_boxes[idx][0] / 40;
                predict_boxes[idx][1] = predict_boxes[idx][1] / 20;
                predict_boxes[idx][2] = predict_boxes[idx][2] / 40;
                predict_boxes[idx][3] = predict_boxes[idx][3] / 20;
                
                x1 = int(round((predict_boxes[idx][0] - predict_boxes[idx][2]/2.0) * 640))
                y1 = int(round((predict_boxes[idx][1] - predict_boxes[idx][3]/2.0) * 360))
                x2 = int(round((predict_boxes[idx][0] + predict_boxes[idx][2]/2.0) * 640))
                y2 = int(round((predict_boxes[idx][1] + predict_boxes[idx][3]/2.0) * 360))
                result_rectangle.append([x1, x2, y1, y2])


In [None]:
###########################################################
################ MAIN PART OF DETECTION ###################
###########################################################

SkyNet = overlay.SkyNet_0 
SkyNet.write(0x10, img.physical_address)
SkyNet.write(0x18, conv_weight_1x1_all.physical_address)
SkyNet.write(0x20, conv_weight_3x3_all.physical_address)
SkyNet.write(0x28, bias_all.physical_address)
SkyNet.write(0x30, DDR_all.physical_address)
SkyNet.write(0x38, predict_boxes.physical_address)
SkyNet.write(0x40, constant.physical_address)

rails = pynq.get_rails()
recorder = pynq.DataRecorder(rails['5V'].power)

################# Declare New Process ##############
mgr = Manager()
result_rectangle = mgr.list()
predicts = mgr.list()
output_queue = mgr.Queue()
name_queue = mgr.Queue()
image_queue = mgr.Queue()
global_img_id = Value('i', 0)

p1 = Process(target=stitch, args=(image_queue, name_queue, global_img_id))
p2 = Process(target=stitch2, args=(image_queue, name_queue, global_img_id))
p3 = Process(target=compute_bounding_box, args=(predicts, result_rectangle, output_queue))

lock1 = Lock()
lock2 = Lock()
################### Start to detect ################
output_boxes = np.empty([4, 5], dtype=np.float32)
output_index = np.empty([4, 3], dtype=np.int32)

lock2.acquire()
p1.start()
p2.start()
p3.start()
batch_id = 0

print("\n**** Start to detect")
start = time.time()
with recorder.record(0.05): 
    while True:
        batch = team.get_next_batch()
        if batch == None:
            break
        if batch_id % 5 == 0:
            print("Processing %d.jpg to %d.jpg" % (batch_id*100, (batch_id+5)*100-1))
        batch_id = batch_id + 1
            
        for i in range(0, len(batch), 4):

            while image_queue.empty():
                continue

            preprocessed_img = image_queue.get()
            np.copyto(img, np.array(preprocessed_img))

            SkyNet.write(0x00, 1)
            isready = SkyNet.read(0x00)
            while( isready == 1 ):
                isready = SkyNet.read(0x00)

            outputs = []
            np.copyto(output_boxes, predict_boxes)
            np.copyto(output_index, constant)
            outputs.append(output_boxes)
            outputs.append(output_index)
            output_queue.put(outputs)

        
p1.join()   
p2.join()
p3.join()
print("\n**** Detection finished\n")
        
end = time.time()
total_time = end - start
print('Total time: ' + str(total_time) + ' s')

#FPS = 1000 / total_time
#print('FPS: ' + str(FPS))

total_energy = recorder.frame["5V_power"].mean() * total_time
print('Total energy: ' + str(total_energy) + ' J')

In [None]:

################ record the results and write to XML
# f_out = open('./bbox_PL_1000_sample.txt', 'w')
# cnt = 0
# for box in result_rectangle:
#     x1 = box[0]
#     x2 = box[1]
#     y1 = box[2]
#     y2 = box[3]
#     coord = str(x1) + ' ' + str(x2) + ' ' + str(y1) + ' ' + str(y2)
#     cnt = cnt + 1
#     f_out.write(coord + '\n')
# f_out.close()
# print("\nAll results stored in bbox_PL_1000_sample.txt")

team.save_results_xml(result_rectangle, total_time, total_energy)
print("XML results written successfully.")

############## clean up #############
xlnk.xlnk_reset()  