# 1. Importing all libraries

In [None]:
import sys
import os

sys.path.append(os.path.abspath("../common"))

import math
import time
import numpy as np
from PIL import Image
import concurrent.futures
from matplotlib import pyplot
import cv2
from datetime import datetime

import pynq
from pynq import Xlnk
import dac_sdc
from IPython.display import display
import ctypes

team_name = 'Nobabyknows'
team = dac_sdc.Team(team_name, batch_size = 4)
print(str(team.team_dir))
cfuns = ctypes.cdll.LoadLibrary(str(team.team_dir)+"/load_image_c/load_image.so")

# 2. Preparing the overlay and weight loading

In [None]:
overlay = pynq.Overlay(team.get_bitstream_path())
print("Bitstream loaded")
HlsNet = overlay.HlsNet_0

# 3. Processing Images

In [None]:
team.reset_batch_count()

# use c code load image
def load_image(image_paths, buff):
    paths = [str(path) for path in image_paths]
    tmp = np.asarray(buff)
    dataptr = tmp.ctypes.data_as(ctypes.c_char_p)
    paths_p_list = [ctypes.c_char_p(bytes(str_, 'utf-8')) for str_ in paths]
    paths_c = (ctypes.c_char_p*len(paths_p_list))(*paths_p_list)
    cfuns.load_resize_image(paths_c, dataptr, len(paths), 160, 320, 4)

def sigmoid(x):
    return 1/(1+np.exp(-x))

def get_image_batch():
    batches = list()
    for i in range(0, len(team.img_list), team.batch_size):  
        batches.append(team.img_list[i:i+team.batch_size])
    return batches

IMAGE_NAMES   = get_image_batch()
IMAGE_NAMES_LEN = len(IMAGE_NAMES)


def compute_bounding_box(bbox_origin, batch, result):
    for b in range(4):
        ys,ws,hs,xs=0,0,0,0
        for i in range(0,12,2):
            xs += sigmoid((bbox_origin[b][i]>>8)/105.0)
            ys += sigmoid((bbox_origin[b][i+1]>>8)/105.0)
        for i in range(12,24,2):
            ws += np.exp((bbox_origin[b][i]>>8)/105.0)
            hs += np.exp((bbox_origin[b][i+1]>>8)/105.0)
        
        xs = xs/6.0 + bbox_origin[b][24]
        ys = ys/6.0 + bbox_origin[b][25]
        ws = ws/6.0
        hs = hs/6.0
        
        xs = xs*32
        ys = ys*36
        ws = ws*40
        hs = hs*45
                                              
        x1 = int(round(xs-ws/2.0))
        y1 = int(round(ys-hs/2.0))
        x2 = int(round(xs+ws/2.0))
        y2 = int(round(ys+hs/2.0))
        x1 = np.clip(x1,1,640)
        y1 = np.clip(y1,1,360)
        x2 = np.clip(x2,1,640)
        y2 = np.clip(y2,1,360)
        
        result.append([x1,x2,y1,y2])
        print(batch[b],[x1,x2,y1,y2])
        

################################## Init FPGA ##################################
xlnk = Xlnk()
xlnk.xlnk_reset()

img   = xlnk.cma_array(shape=[4,160,320,4], dtype=np.uint8)
fm= xlnk.cma_array(shape=(73316*16), dtype=np.uint8)
bbox = xlnk.cma_array(shape=[4,26], dtype=np.int32)
print("Finished allocating memory")


HlsNet.write(0x10, img.physical_address)
HlsNet.write(0x18, fm.physical_address)
HlsNet.write(0x20, bbox.physical_address)

rails = pynq.get_rails()
recorder = pynq.DataRecorder(rails["5V"].power)
################################## Main process ##################################
bbox_origin = np.empty([4,26], dtype=np.int32)
result= []
batch_buff  = None
image = np.zeros((4,160,320,4),np.uint8)
image_buff  = np.zeros((4,160,320,4),np.uint8)


print("\n**** Start to detect")
start = time.time()
with recorder.record(0.05):
    for i, batch in enumerate(IMAGE_NAMES):
        if (i == 0):
            image_paths = team.get_next_batch() 
            load_image(image_paths, image)
            
            np.copyto(img, image)
        else:
            np.copyto(img, image_buff)
          
        HlsNet.write(0x00, 1)
        if (0 <= i < IMAGE_NAMES_LEN - 1):
            image_paths = team.get_next_batch()
            load_image(image_paths, image_buff)
               
        if (0 < i <= IMAGE_NAMES_LEN - 1):
            compute_bounding_box(bbox_origin, batch_buff, result)

        isready = HlsNet.read(0x00)
        while( isready == 1 ):
            isready = HlsNet.read(0x00)
      
        np.copyto(bbox_origin, bbox)
        batch_buff = batch

        if (i == IMAGE_NAMES_LEN - 1):
            compute_bounding_box(bbox_origin, batch, result)
    
    

    
end = time.time()
total_time = end - start
print("**** Detection finished\n")
total_energy = recorder.frame["5V_power"].mean() * total_time
print("Total energy:",str(total_energy),'J')
print('Total time: ',str(total_time) , ' s')


# 4. Recording results, cleanup

In [None]:
#result = resort_result(result_queue)
team.save_results_xml(result, total_time, total_energy)
print("XML results written successfully.")