# Make Verilog Testbench

This notebook is based on what is explained in [Finn Verilog](https://github.com/Xilinx/finn/blob/main/tutorials/fpga_flow/README.md). It allows to generate batches to test the model and take care of dimensions of .dat files.

In [None]:
import numpy as np
import os

from qonnx.core.modelwrapper import ModelWrapper

from qonnx.custom_op.registry import getCustomOp

import finn.builder.build_dataflow as build
import finn.builder.build_dataflow_config as build_cfg
import finn.util.data_packing as dpk
from qonnx.core.datatype import DataType

In [None]:
from pathlib import Path
import cv2
import my_metrics as metrics
import matplotlib.pyplot as plt

# Load Image and Label

In [None]:
def get_label(label_file):
    '''
    Receives a txt file and returns the label associated, as [smoke?, fire?]
    '''
    label_array = np.zeros((2))
    
    with open(label_file) as f:
        lines = f.readlines()
        for line in lines:
            class_id, _, _, _, _ = line.strip().split()
            class_id = int(class_id)
            if np.array_equal(label_array, np.array([1, 1])):
                break
            else:
                label_array[class_id] = 1.

    label_array = np.expand_dims(label_array, axis=0)

    return label_array    

In [None]:
def load_image_and_label(img_file, labels_dir):
    '''
    Receives image and label files and returns the image ready for FINN and corresponding label   
    '''
    img = cv2.imread(img_file)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  
    img = cv2.resize(img, (224, 224), interpolation = cv2.INTER_LINEAR)
    img = img.copy().astype(np.float32)    
    img = np.expand_dims(img, axis=0)

    img_name = Path(img_file).stem
    label_file = labels_dir + img_name + '.txt'
    label = get_label(label_file)

    return img, label

### Images and Labels Folders

In [None]:
DS_FOLDER = '../../datasets/dfire_mini/'
DS_FOLDER += 'test/'

DS_IMGS = DS_FOLDER + 'images/'
DS_LABELS = DS_FOLDER + 'labels/'

In [None]:
IMGS_LIST = sorted([DS_IMGS + img_file for img_file in os.listdir(DS_IMGS)])

In [None]:
print(IMGS_LIST[10])

### Load Test Image and Label

IMG_BATCH controls if model is tested with only 1 image or with a batch of images. If a batch is choosen, images must be concatenated.

In [None]:
IMG_BATCH = True

### TEST
test_img_list = [IMGS_LIST[2], IMGS_LIST[9], IMGS_LIST[12], IMGS_LIST[14],
                 IMGS_LIST[18], IMGS_LIST[19], IMGS_LIST[24], IMGS_LIST[27]]

if not IMG_BATCH:
    npy_img, npy_label = load_image_and_label(
        img_file=test_img, 
        labels_dir=DS_LABELS)
else:
    npy_img, npy_label = load_image_and_label(
        img_file=test_img_list[0], 
        labels_dir=DS_LABELS)
    for i in range(len(test_img_list)):
        if i != 0:
            i_npy_img, i_npy_label = load_image_and_label(
                img_file=test_img_list[i], 
                labels_dir=DS_LABELS)
            npy_img = np.concatenate((npy_img, i_npy_img))
            npy_label = np.concatenate((npy_label, i_npy_label))    

In [None]:
print(f'Image shape = {npy_img.shape}')
print(f'Image data type = {npy_img.dtype}')

print(f'Label shape = {npy_label.shape}')
print(f'Label data type = {npy_img.dtype}')

### Plot Image

In [None]:
fig, ax = plt.subplots(figsize=(3,3))

ax.imshow(npy_img[3]/255.)
plt.show()

### Template and Build folders

Use templates provided by Xilinx:

https://github.com/Xilinx/finn/tree/main/tutorials/fpga_flow/templates

#### 750 FPS

In [None]:
verilog_stitched_folder = './verilog_stitched_sim/750_FPS'
template_folder = './verilog_stitched_sim/templates'
build_folder = ('./experiments' 
               + '/750_FPS'
               + '/02_full_build_vvau_lut_hls_mvau_hls'
               + '/output_full_build')
sim_output_dir = build_folder + "/sim"

if not os.path.isdir(sim_output_dir):
    print(f'Creating {sim_output_dir}')
    os.makedirs(sim_output_dir)

#### 30 FPS

In [None]:
# verilog_stitched_folder = './verilog_stitched_sim/30_FPS'
# template_folder = './verilog_stitched_sim/templates'
# build_folder = ('./experiments_CLK' 
#                + '/30_FPS_CLK_200ns'
#                + '/03_full_build_final_mvau_hls'
#                + '/output_full_build')

#### 5 FPS

In [None]:
# verilog_stitched_folder = './verilog_stitched_sim/05_FPS'
# template_folder = './verilog_stitched_sim/templates'
# build_folder = ('./experiments_CLK' 
#                + '/1_FPS_CLK_1us'
#                + '/04_full_build_final_mvau_hls'
#                + '/output_full_build')

### Filenames

In [None]:
inp_fname = '/input'
expected_oup_fname = '/expected_output'

inp_npy_fname = inp_fname + '.npy'
expected_oup_npy_fname = expected_oup_fname + '.npy'

inp_dat_fname = inp_fname + '.dat'
expected_oup_dat_fname = expected_oup_fname + '.dat'

In [None]:
print(inp_npy_fname, "\n", expected_oup_npy_fname)
print(inp_dat_fname, "\n", expected_oup_dat_fname)

### Save Numpy files

In [None]:
np.save(verilog_stitched_folder + inp_npy_fname, npy_img)
np.save(verilog_stitched_folder + expected_oup_npy_fname, npy_label)

### Load Numpy files to check

In [None]:
inp_check = np.load(verilog_stitched_folder + inp_npy_fname)
oup_check = np.load(verilog_stitched_folder + expected_oup_npy_fname)

In [None]:
print(f'Input shape: {inp_check.shape}')
print(f'Input type: {inp_check.dtype}')

print(oup_check.shape)

In [None]:
fig, ax = plt.subplots(figsize=(3,3))

ax.imshow(inp_check[0]/255.)
plt.show()

# Step by Step generation of files

### Image data to input.dat

In [None]:
# Model and Stitched folder
model_filename = build_folder + "/intermediate_models/step_create_stitched_ip.onnx"
model = ModelWrapper(model_filename)

# load the provided input data
inp_data = np.load(verilog_stitched_folder + inp_npy_fname)
batchsize = inp_data.shape[0]
print(f'Batch size: {batchsize}')

In [None]:
# query the parallelism-dependent folded input shape from the
# node consuming the graph input
inp_name = model.graph.input[0].name
inp_node = getCustomOp(model.find_consumer(inp_name))
inp_shape_folded = list(inp_node.get_folded_input_shape())

print(f'Input shape folded: {inp_shape_folded}')

inp_stream_width = inp_node.get_instream_width_padded()

print(f'Input stream width: {inp_stream_width}')

In [None]:
# # fix first dimension (N: batch size) to correspond to input data
# # since FINN model itself always uses N=1
inp_shape_folded[0] = batchsize
inp_shape_folded = tuple(inp_shape_folded)
inp_dtype = model.get_tensor_datatype(inp_name)
print(f'Input data type: {inp_dtype}')
# now re-shape input data into the folded shape and do hex packing
inp_data = inp_data.reshape(inp_shape_folded)

print(f'Input data after folded reshape: {inp_data.shape}')

In [None]:
inp_data_packed = dpk.pack_innermost_dim_as_hex_string(
    inp_data, inp_dtype, inp_stream_width, prefix="", reverse_inner=True
)

print(f'Input data packed: {inp_data_packed.shape}')
# print(inp_data_packed)

In [None]:
if not IMG_BATCH:
    inp_data_packed = np.squeeze(inp_data_packed)
    print(f'Input data packed after squeeze: {inp_data_packed.shape}')
else:
    # np.savetxt cannot save arrays of more than 2D, so data must be reshaped
    inp_data_packed = np.reshape(inp_data_packed, (1, -1, inp_data_packed.shape[2]))  
    inp_data_packed = np.squeeze(inp_data_packed)
    print(f'Input data packed after reshape: {inp_data_packed.shape}')

np.savetxt(verilog_stitched_folder + inp_dat_fname, inp_data_packed, fmt="%s", delimiter="\n")

### Label to out.dat

In [None]:
# load expected output and calculate folded shape
exp_out = np.load(verilog_stitched_folder + expected_oup_npy_fname)    
out_name = model.graph.output[0].name
out_node = getCustomOp(model.find_producer(out_name))
out_shape_folded = list(out_node.get_folded_output_shape())
out_stream_width = out_node.get_outstream_width_padded()
print(f'Out stream width: {out_stream_width}')
out_shape_folded[0] = batchsize
out_shape_folded = tuple(out_shape_folded)
print(f'Out shape folded: {out_shape_folded}')
out_dtype = model.get_tensor_datatype(out_name)
print(f'Out data type: {out_dtype}')
exp_out = exp_out.reshape(out_shape_folded)
out_data_packed = dpk.pack_innermost_dim_as_hex_string(
    exp_out, out_dtype, out_stream_width, prefix="", reverse_inner=True
)
np.savetxt(
    verilog_stitched_folder + expected_oup_dat_fname,
    out_data_packed,
    fmt="%s",
    delimiter="\n",
)

### Testbench file

As the model output is Bipolar, some minor changes must be applied to the ```finn_testbench.sv``` file generated:

- When dout_tdata is used, it must be changed to dout_tdata[0].
- The same for [rd_ptr][0]

Below the code with the changes:
```
if (dout_tdata[0] !== exp_output_data[rd_ptr][0]) begin
  $display("ERR: Data mismatch %h != %h ",dout_tdata[0], exp_output_data[rd_ptr][0]);
  err_count++;
end else begin
  $display("CHK: Data    match %h == %h   --> %0d",dout_tdata[0], exp_output_data[rd_ptr][0], data_count);
```

It helps adding a simple constraint file to Vivado project, so there is a clock in the design and power estimate can be performed with SAIF files for switching activity. Below an example for 5 MHz clock:
```
create_clock -name ap_clk -period 200.000 [get_ports ap_clk]
```

If the constraint file is added, ```finn_testbench.sv``` clock must be changed accordingly:
```
always #100ns ap_clk = !ap_clk; //30FPS -> clock is 5MHz
```

In [None]:
# fill in testbench template
with open(template_folder + "/finn_testbench.template.sv", "r") as f:
    testbench_sv = f.read()
testbench_sv = testbench_sv.replace("@N_SAMPLES@", str(batchsize))
testbench_sv = testbench_sv.replace("@IN_STREAM_BITWIDTH@", str(inp_stream_width))
testbench_sv = testbench_sv.replace("@OUT_STREAM_BITWIDTH@", str(out_stream_width))
testbench_sv = testbench_sv.replace(
    "@IN_BEATS_PER_SAMPLE@", str(int(np.prod(inp_shape_folded[:-1]) / batchsize))
)
testbench_sv = testbench_sv.replace(
    "@OUT_BEATS_PER_SAMPLE@", str(int(np.prod(out_shape_folded[:-1]) / batchsize))
)

time_out = 1600000 # Cycles to wait after last image of the batch is loaded in the accelerator
if not IMG_BATCH:
    time_out_str = str(time_out) 
    testbench_sv = testbench_sv.replace("@TIMEOUT_CYCLES@", time_out_str) 
else:
    time_out_str = str(batchsize * time_out) 
    testbench_sv = testbench_sv.replace("@TIMEOUT_CYCLES@", time_out_str) 
    
if not IMG_BATCH:
    with open(verilog_stitched_folder + "/finn_testbench.sv", "w") as f:
        f.write(testbench_sv)
else:
    with open(verilog_stitched_folder + "/finn_testbench_batch.sv", "w") as f:
        f.write(testbench_sv)