In [1]:
from pynq import Overlay
from pynq import MMIO
import numpy as np
import time

In [2]:
input_fmap = np.load("input.npy").flatten()
conv1_weight = np.load("layer1_0_weight.npy")
conv2_weight = np.load("layer2_0_weight.npy")
fc_weight = np.load("fc1_weight.npy")
conv12_weight = np.concatenate((conv1_weight.flatten(), conv2_weight.flatten()))
output = np.load("output.npy")
answer = np.load("label.npy")

In [3]:
class PynqTestDriver:
    def __init__(self, bitfile_path):
        self.hw = Overlay(bitfile_path)
        self.csr = self.hw.csr_0.mmio.array
        self.ifmap = self.hw.BRAM1.mmio.array
        self.conv12_w = self.hw.BRAM6.mmio.array
        self.fc_w1 = self.hw.BRAM7_1.mmio.array
        self.fc_w2 = self.hw.BRAM7_2.mmio.array
        self.fc_w3 = self.hw.BRAM7_3.mmio.array
        self.fc_w4 = self.hw.BRAM7_4.mmio.array
        self.fc_w5 = self.hw.BRAM7_5.mmio.array
        self.fc_w6 = self.hw.BRAM7_6.mmio.array
        self.fc_w7 = self.hw.BRAM7_7.mmio.array
        self.fc_w8 = self.hw.BRAM7_8.mmio.array
        self.fc_w9 = self.hw.BRAM7_9.mmio.array
        self.fc_w10 = self.hw.BRAM7_10.mmio.array
        self.omem = self.hw.BRAM8.mmio.array
    
    def start(self, input_fmap, conv1_weight, conv2_weight, fc_weight):
        result = np.empty((100000), dtype=np.int8)
        start_time = time.time()
        self.conv12_w[0:1224] = conv12_weight
        self.fc_w1[0:2304] = fc_weight[0]
        self.fc_w2[0:2304] = fc_weight[1]
        self.fc_w3[0:2304] = fc_weight[2]
        self.fc_w4[0:2304] = fc_weight[3]
        self.fc_w5[0:2304] = fc_weight[4]
        self.fc_w6[0:2304] = fc_weight[5]
        self.fc_w7[0:2304] = fc_weight[6]
        self.fc_w8[0:2304] = fc_weight[7]
        self.fc_w9[0:2304] = fc_weight[8]
        self.fc_w10[0:2304] = fc_weight[9]
        for i in range(100):
            self.ifmap[0:78400] = input_fmap[0+78400*i:78400*(i+1)]
            self.csr[1] = 1
            while (self.csr[0] == 0):
                pass
            result[1000*i:1000*(i+1)] = np.array(self.omem[0:1000])
        end_time = time.time()
        runtime = end_time - start_time
        print(f"Runtime: {runtime*1000:.3f}ms")
        return result.reshape((10000, 10))
        

In [5]:
hw = PynqTestDriver('npu_int8.bit')

In [6]:
result = hw.start(input_fmap, conv1_weight, conv2_weight, fc_weight)

Runtime: 4363.858ms


In [8]:
print("Comparizon with output.npy")
cnt = 0;
for i in range(10000):
    if(int(np.array_equal(result[i], output[i])) == 1):
        cnt += 1;
print(f"Accuracy: {cnt/100:.2f}%")
print("------------")
print("Comparizon with label.npy")
cnt = 0;
for i in range(10000):
    if(np.argmax(result[i])==answer[i]):
        cnt += 1;
print(f"Accuracy: {cnt/100:.2f}%")



Comparizon with output.npy
Accuracy: 100.00%
------------
Comparizon with label.npy
Accuracy: 93.49%


In [9]:
print(result)

[[ 0 -2  2 ...  4  3  3]
 [11 -9  1 ...  2 -1  5]
 [-7 -4 -1 ...  2  1 -3]
 ...
 [-1  2  2 ... -4  1 -2]
 [-5 -1  0 ...  4  3  8]
 [ 4  0  0 ...  9  3  5]]
