# BNN Setup

In [None]:
import bnn
print(bnn.available_params(bnn.NETWORK_LFCW1A1))

In [None]:
hw_classifier = bnn.LfcClassifier(bnn.NETWORK_LFCW1A1,"mnist",bnn.RUNTIME_HW)
sw_classifier = bnn.LfcClassifier(bnn.NETWORK_LFCW1A1,"mnist",bnn.RUNTIME_SW)
print(hw_classifier.classes)

In [None]:
import cv2
import mnist
import numpy as np

test_data = mnist.test_images()
test_label = mnist.test_labels()

test_data.shape

In [None]:
import matplotlib.pyplot as plt
plt.imshow(test_data[1,:,:], 'gray')
plt.title('Label: {}'.format(test_label[1]))
plt.axis('off')
plt.show()

## Convert to BNN input format
The image is resized to comply with the MNIST standard. The image is resized at 28x28 pixels and the colors inverted. 

In [None]:
from array import *
from PIL import Image as PIL_Image
from PIL import ImageOps
img_load = PIL_Image.fromarray(test_data[1,:,:])
img_load

In [None]:
data_image = array('B')  
  
pixel = img_load.load()  
for x in range(0,28):  
    for y in range(0,28):  
        if(pixel[y,x] > 127):  
            data_image.append(255)  
        else:  
            data_image.append(1)  
          
# Setting up the header of the MNIST format file - Required as the hardware is designed for MNIST dataset         
hexval = "{0:#0{1}x}".format(1,6)  
header = array('B')  
header.extend([0,0,8,1,0,0])  
header.append(int('0x'+hexval[2:][:2],16))  
header.append(int('0x'+hexval[2:][2:],16))  
header.extend([0,0,0,28,0,0,0,28])  
header[3] = 3 # Changing MSB for image data (0x00000803)  
data_image = header + data_image  
output_file = open('/home/xilinx/mnist_img_proc', 'wb')  
data_image.tofile(output_file)  
output_file.close()

## Launching BNN in hardware

The image is passed in the PL and the inference is performed. Use `classify_mnist` to classify a single mnist formatted picture.

In [None]:
class_out = hw_classifier.classify_mnist("/home/xilinx/mnist_img_proc")
print("Class number: {0}".format(class_out))
print("Class name: {0}".format(hw_classifier.class_name(class_out)))
print("correct" if class_out == test_label[1] else "fail")

## Launching BNN in software
The inference on the same image is performed in sofware on the ARM core

In [None]:
class_out=sw_classifier.classify_mnist("/home/xilinx/mnist_img_proc")
print("Class number: {0}".format(class_out))
print("Class name: {0}".format(hw_classifier.class_name(class_out)))
print("correct" if class_out == test_label[1] else "fail")

## Do the same in a loop

In [None]:
import sys, os

# Disable
current_stdout = sys.stdout
def blockPrint():
    sys.stdout = open(os.devnull, 'w')

# Restore
def enablePrint():
    sys.stdout.flush()
    sys.stdout = current_stdout

def classify_img(index):
    img_load = PIL_Image.fromarray(test_data[index,:,:])
    data_image = array('B')  
  
    pixel = img_load.load()  
    for x in range(0,28):  
        for y in range(0,28):  
            if(pixel[y,x] > 127):  
                data_image.append(255)  
            else:  
                data_image.append(1)  
          
    # Setting up the header of the MNIST format file - Required as the hardware is designed for MNIST dataset         
    hexval = "{0:#0{1}x}".format(1,6)  
    header = array('B')  
    header.extend([0,0,8,1,0,0])  
    header.append(int('0x'+hexval[2:][:2],16))  
    header.append(int('0x'+hexval[2:][2:],16))  
    header.extend([0,0,0,28,0,0,0,28])  
    header[3] = 3 # Changing MSB for image data (0x00000803)  
    data_image = header + data_image  
    output_file = open('/home/xilinx/mnist_img_proc', 'wb')  
    data_image.tofile(output_file)  
    output_file.close()
    class_out = hw_classifier.classify_mnist("/home/xilinx/mnist_img_proc")
    correct = class_out == test_label[index]
    return correct
    

from time import time
correct_cnt = 0
blockPrint()
start_time = time()
for i in range(0, len(test_data)):
    if (classify_img(i)):
        correct_cnt = correct_cnt + 1
end_time = time()
enablePrint()
execution_time = end_time - start_time
total = len(test_data)
print("Overall accuracy: {}".format(correct_cnt/total))
print("  Execution time: {:.4f}s".format(execution_time))
print("      Throughput: {:.4f}FPS".format(total/execution_time))

## Reset the device and the notebook

In [None]:
from pynq import Xlnk

xlnk = Xlnk()
xlnk.xlnk_reset()
%reset

# DPU

In [None]:
from pynq_dpu import DpuOverlay
from time import time
import numpy as np
import mnist
from dnndk import n2cube
import matplotlib.pyplot as plt

overlay = DpuOverlay("dpu.bit")
overlay.load_model("dpu_mnist_classifier_0.elf")

raw_data = mnist.test_images()
normalized_data = np.asarray(raw_data/255, dtype=np.float32)
test_data = np.expand_dims(normalized_data, axis=3)
test_label = mnist.test_labels()

print("Total number of test images: {}".format(test_data.shape[0]))
print("  Dimension of each picture: {}x{}".format(test_data.shape[1],
                                                  test_data.shape[2]))

In [None]:
plt.imshow(test_data[1,:,:,0], 'gray')
plt.title('Label: {}'.format(test_label[1]))
plt.axis('off')
plt.show()

In [None]:
KERNEL_NAME = "mnist_classifier_0"
KERNEL_CONV_INPUT = "conv2d_1_convolution"
KERNEL_FC_OUTPUT = "output_logits_MatMul"

n2cube.dpuOpen()
kernel = n2cube.dpuLoadKernel(KERNEL_NAME)
task = n2cube.dpuCreateTask(kernel, 0)
input_len = n2cube.dpuGetInputTensorSize(task, KERNEL_CONV_INPUT)
size = n2cube.dpuGetOutputTensorSize(task, KERNEL_FC_OUTPUT)
channel = n2cube.dpuGetOutputTensorChannel(task, KERNEL_FC_OUTPUT)
conf = n2cube.dpuGetOutputTensorAddress(task, KERNEL_FC_OUTPUT)
outputScale = n2cube.dpuGetOutputTensorScale(task, KERNEL_FC_OUTPUT)

### Trial run

In [None]:
num_pics  = 10
fix, ax = plt.subplots(1, num_pics, figsize=(12,12))
plt.tight_layout()
for i in range(num_pics):
    n2cube.dpuSetInputTensorInHWCFP32(task, KERNEL_CONV_INPUT, 
                                      test_data[i], input_len)
    n2cube.dpuRunTask(task)
    softmax = n2cube.dpuRunSoftmax(conf, channel, size//channel, outputScale)
    prediction = softmax.argmax()

    ax[i].set_title('Prediction: {}'.format(prediction))
    ax[i].axis('off')
    ax[i].imshow(test_data[i,:,:,0], 'gray')

### All pictures

In [None]:
total = test_data.shape[0]
predictions = np.empty_like(test_label)
print("Classifying {} digit pictures ...".format(total))

start = time()
for i in range(total):
    n2cube.dpuSetInputTensorInHWCFP32(task, KERNEL_CONV_INPUT,
                                      test_data[i], input_len)
    n2cube.dpuRunTask(task)
    softmax = n2cube.dpuRunSoftmax(conf, channel, size//channel, outputScale)
    predictions[i] = softmax.argmax()
stop = time()
correct = np.sum(predictions==test_label)
execution_time = stop-start
print("Overall accuracy: {}".format(correct/total))
print("  Execution time: {:.4f}s".format(execution_time))
print("      Throughput: {:.4f}FPS".format(total/execution_time))

### Clean up

In [None]:
n2cube.dpuDestroyKernel(kernel)
n2cube.dpuDestroyTask(task)
%reset