In [1]:
import sys, os, time
import numpy as np
np.set_printoptions(precision=3)

from pynq import Xlnk
from pynq import Overlay
import pynq
from multiprocessing import Process, Pipe, Queue, Event, Manager, Value, Lock

xlnk = Xlnk()
xlnk.xlnk_reset()

In [2]:
overlay = Overlay("./0503.bit")
# overlay?
FracNet = overlay.FracNet_0

In [3]:
FracNet.register_map

RegisterMap {
  CTRL = Register(AP_START=0, AP_DONE=0, AP_IDLE=1, AP_READY=0, RESERVED_1=0, AUTO_RESTART=0, RESERVED_2=0),
  GIER = Register(Enable=0, RESERVED=0),
  IP_IER = Register(CHAN0_INT_EN=0, CHAN1_INT_EN=0, RESERVED=0),
  IP_ISR = Register(CHAN0_INT_ST=0, CHAN1_INT_ST=0, RESERVED=0),
  image_thermo_V = Register(image_thermo_V=0),
  conv_weight_3x3_all_new_V = Register(conv_weight_3x3_all_new_V=0),
  conv_weight_1x1_all_new_V = Register(conv_weight_1x1_all_new_V=0),
  weights_all_V = Register(weights_all_V=0),
  DDR_buf_pack_V = Register(DDR_buf_pack_V=0)
}

In [4]:
bus512 = 'B,'*63 + 'B'
dt_512 = np.dtype(bus512)

bus256 = 'B,'*31 + 'B'
dt_256 = np.dtype(bus256)

print(xlnk.cma_stats())
image_thermo = xlnk.cma_array(shape=(3*224*224), dtype=np.uint32)
conv_weight_3x3_all = xlnk.cma_array(shape=(49104), dtype=dt_512)
conv_weight_1x1_all = xlnk.cma_array(shape=(6132), dtype=dt_512)
other_weight_all = xlnk.cma_array(shape=(5460), dtype=dt_512)
DDR_all = xlnk.cma_array(shape=(203400), dtype=dt_512)
print(xlnk.cma_stats())

{'CMA Memory Available': 127721472, 'CMA Memory Usage': 0, 'Buffer Count': 0}
{'CMA Memory Available': 110206976, 'CMA Memory Usage': 17504256, 'Buffer Count': 5}


In [5]:
fc_weight = np.ndarray((1000, 1024))
fc_bias = np.ndarray(1000)

In [6]:
image_hw_all_file = np.fromfile("image_hw_all_host_new.bin", dtype=np.uint32)

conv_weight_3x3_file = np.fromfile("conv3x3_weights_host_new.bin", dtype=dt_512)
np.copyto(conv_weight_3x3_all, conv_weight_3x3_file.reshape(conv_weight_3x3_all.shape))

conv_weight_1x1_file = np.fromfile("conv1x1_weights_host_new.bin", dtype=dt_512)
np.copyto(conv_weight_1x1_all, conv_weight_1x1_file.reshape(conv_weight_1x1_all.shape))

other_weights_file = np.fromfile("other_weights_host_new.bin", dtype=dt_512)
np.copyto(other_weight_all, other_weights_file.reshape(other_weight_all.shape))

fc_weight_file = np.fromfile("fc_weight_host.bin", dtype=np.float32)
np.copyto(fc_weight, fc_weight_file.reshape(fc_weight.shape))

fc_bias_file = np.fromfile("fc_bias_host.bin", dtype=np.float32)
np.copyto(fc_bias, fc_bias_file.reshape(fc_bias.shape))


In [7]:
FracNet.register_map.image_thermo_V    = image_thermo.physical_address
FracNet.register_map.conv_weight_3x3_all_new_V   = conv_weight_3x3_all.physical_address
FracNet.register_map.conv_weight_1x1_all_new_V    = conv_weight_1x1_all.physical_address
FracNet.register_map.weights_all_V    = other_weight_all.physical_address
FracNet.register_map.DDR_buf_pack_V    = DDR_all.physical_address
FracNet.register_map

RegisterMap {
  CTRL = Register(AP_START=0, AP_DONE=0, AP_IDLE=1, AP_READY=0, RESERVED_1=0, AUTO_RESTART=0, RESERVED_2=0),
  GIER = Register(Enable=0, RESERVED=0),
  IP_IER = Register(CHAN0_INT_EN=0, CHAN1_INT_EN=0, RESERVED=0),
  IP_ISR = Register(CHAN0_INT_ST=0, CHAN1_INT_ST=0, RESERVED=0),
  image_thermo_V = Register(image_thermo_V=2016411648),
  conv_weight_3x3_all_new_V = Register(conv_weight_3x3_all_new_V=2017460224),
  conv_weight_1x1_all_new_V = Register(conv_weight_1x1_all_new_V=2015887360),
  weights_all_V = Register(weights_all_V=2020605952),
  DDR_buf_pack_V = Register(DDR_buf_pack_V=2021654528)
}

In [8]:
def avgpool(conv):
    ret = np.ndarray((1024))
    for i in range(1024):
        ret[i] = np.sum(conv[i])
    return ret/49

def matmul(avgpool, weight, bias):
    logits = np.matmul(avgpool, weight.T) + bias
    p = np.argmax(logits)
    return p, logits

def classifier(weight, bias, conv_out, num_img):
    while True:
        while conv_out.empty():
            continue
        DDR_all_p = conv_out.get()
        conv13_hw = np.ndarray((1024, 7, 7))
        for i in range(1024):
            ch_ptr = i//32
            ch_off = i%32
            ch_offptr = ch_off//2
            ch_offoff = ch_off%2
            for row in range(7):
                for col in range(2):
                    for col_i in range(4):
                        if col*4 + col_i < 7:
                            index = ch_ptr*img_height*img_width_ceil + row*img_width_ceil + col
                            tmp4 = DDR_all_p[index][col_i*16 + ch_offptr]
                            if ch_offoff == 1:
                                tmp4int = tmp4//16
                            else:
                                tmp4int = tmp4%16
                            tmp4_norm = tmp4int*2/15 - 1
                            conv13_hw[i][row][col*4 + col_i] = tmp4_norm
        avgpool_out = avgpool(conv13_hw)
        pred, logits = matmul(avgpool_out, fc_weight, fc_bias)
        predicts.append(pred)
        if len(predicts) == num_img:
            break

In [9]:
from time import perf_counter
idle = FracNet.register_map.CTRL.AP_IDLE
FracNet.register_map.CTRL.AP_START = 0

mgr = Manager()
predicts = mgr.list()
conv_out_q = mgr.Queue()

t = 0
img_height = 7
img_width = 7
img_width_ceil = 2

num_imgs = 1

p1 = Process(target=classifier, args=(fc_weight, fc_bias, conv_out_q, num_imgs))
p1.start()

for i in range(num_imgs):
    np.copyto(image_thermo, image_hw_all_file)
    idle = 0
    FracNet.register_map.CTRL.AP_START = 1
    
    ts = perf_counter()
    while idle == 0:
        idle = FracNet.register_map.CTRL.AP_IDLE
    tt = perf_counter()
    t += tt - ts
    conv_out_q.put(DDR_all)
    
p1.join() 
print(predicts)

[0]
