# Imports

Run this cell first to get imports and build_dir

In [None]:
import onnx
import brevitas.onnx as bo

from brevitas.core.quant import QuantType
from finn.core.datatype import DataType
from finn.core.modelwrapper import ModelWrapper
from finn.custom_op.registry import getCustomOp
from finn.util.basic import pynq_part_map
from finn.util.pytorch import ToTensor
from finn.util.visualization import showSrc, showInNetron
from finn.transformation.infer_shapes import InferShapes
from finn.transformation.infer_datatypes import InferDataTypes
from finn.transformation.infer_data_layouts import InferDataLayouts
from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames, RemoveStaticGraphInputs
from finn.transformation.general import RemoveUnusedTensors
from finn.transformation.fold_constants import FoldConstants
from finn.transformation.merge_onnx_models import MergeONNXModels
from finn.transformation.streamline import Streamline
from finn.transformation.streamline.round_thresholds import RoundAndClipThresholds
from finn.transformation.streamline.reorder import MakeMaxPoolNHWC, MoveScalarLinearPastInvariants
from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul
from finn.transformation.insert_topk import InsertTopK
from finn.transformation.fpgadataflow.create_dataflow_partition import CreateDataflowPartition
from finn.transformation.fpgadataflow.make_zynq_proj import ZynqBuild
from finn.transformation.fpgadataflow.make_deployment import DeployToPYNQ

import finn.transformation.streamline.absorb as absorb
import finn.transformation.streamline.reorder as reorder
import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls


build_dir = "/workspace/finn/notebooks/fpga/mlp_v5"

model_name = build_dir + "/mlp_feat_1111_1002.onnx"
model_tidy = build_dir + "/mlp_tidy.onnx"
model_prepoc_chkpt = build_dir + "/mlp_prepoc.onnx"
model_prepost = build_dir + "/mlp_prepost.onnx"
model_ready_for_hls = build_dir + "/mlp_ready_for_hls.onnx"
model_hls_layers = build_dir + "/mlp_hls_layers.onnx"
model_dataflow_parent = build_dir + "/mlp_dataflow_parent.onnx"
model_partition = build_dir + "/mlp_partition.onnx"
model_set_folding_factors = build_dir + "/mlp_set_folding_factors.onnx"
model_partition = build_dir + "/mlp_partition.onnx"
model_post_synthesis = build_dir + "/mlp_post_synthesis.onnx"
model_pynq_deploy = build_dir + "/mlp_pynq_deploy.onnx"

# Tidy the onnx

In [None]:
model = ModelWrapper(model_name)
model = model.transform(InferShapes())
model = model.transform(GiveUniqueNodeNames())
model = model.transform(GiveReadableTensorNames())
model = model.transform(InferDataTypes())
model = model.transform(RemoveStaticGraphInputs())
model.save(model_tidy)

In [None]:
showInNetron(model_tidy)

# Preprocessing and postprocessing

Preprocess input by exporting a single-node ONNX graph for division by 255 (which already exists as finn.util.pytorch.ToTensor and merging this with our original model
Then, mark the input tensor as 8-bit to let FINN know which level of precision to use.

In [None]:
model = ModelWrapper(model_tidy)
global_inp_name = model.graph.input[0].name
ishape = model.get_tensor_shape(global_inp_name)
print(ishape)

bo.export_finn_onnx(ToTensor(), ishape, model_prepoc_chkpt)

# join preprocessing and core model
pre_model = ModelWrapper(model_prepoc_chkpt)
model = model.transform(MergeONNXModels(pre_model))

# add input quantization annotation: UINT8 for BNN (finn default)
global_inp_name = model.graph.input[0].name
model.set_tensor_datatype(global_inp_name, DataType.UINT8)

model.save(model_prepost)

In [None]:
showInNetron(model_prepost)

# Streamlining
Use in-built FINN functions to do streamlining (eliminate floating point operations by moving them around, then collapsing them into one operation and in the last step transform them into multi-thresholding nodes)

In [None]:
model = ModelWrapper(model_prepost)

model = model.transform(InsertTopK(k=1))
model = model.transform(MoveScalarLinearPastInvariants())
model = model.transform(Streamline())
model = model.transform(InferDataLayouts())
model = model.transform(RemoveUnusedTensors())

model = model.transform(reorder.MoveScalarLinearPastInvariants())
model = model.transform(reorder.MoveScalarAddPastMatMul())
model = model.transform(reorder.MoveScalarMulPastMatMul())

model = model.transform(absorb.AbsorbAddIntoMultiThreshold())
model = model.transform(absorb.AbsorbMulIntoMultiThreshold())
model = model.transform(absorb.AbsorbScalarMulAddIntoTopK())
model = model.transform(RoundAndClipThresholds())
model = model.transform(InferDataLayouts())
model = model.transform(RemoveUnusedTensors())

model.save(model_ready_for_hls)

In [None]:
showInNetron(model_ready_for_hls)

In [None]:
model = ModelWrapper(model_ready_for_hls)
model = model.transform(to_hls.InferAddStreamsLayer())
model = model.transform(to_hls.InferThresholdingLayer())
model = model.transform(to_hls.InferQuantizedStreamingFCLayer("decoupled"))
model = model.transform(to_hls.InferChannelwiseLinearLayer())
model.save(model_hls_layers)

In [None]:
showInNetron(model_hls_layers)

In [None]:
model = ModelWrapper(model_hls_layers)
parent_model = model.transform(CreateDataflowPartition())
parent_model.save(model_dataflow_parent)

In [None]:
showInNetron(model_dataflow_parent)

In [None]:
sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0]
sdp_node = getCustomOp(sdp_node)
dataflow_model_filename = sdp_node.get_nodeattr("model")
model = ModelWrapper(dataflow_model_filename)
model.save(model_partition)

In [None]:
showInNetron(model_partition)

In [None]:
fc0 = model.graph.node[0]
fc0w = getCustomOp(fc0)

#print("CustomOp wrapper is of class " + fc0w.__class__.__name__)
#fc0w.get_nodeattr_types()

In [None]:
fc_layers = model.get_nodes_by_op_type("StreamingFCLayer_Batch")
# (PE, SIMD, in_fifo_depth, out_fifo_depth, ramstyle) for each layer

config = [
    (16, 49, 16, 64, "auto"),
    (10, 8, 64, 10, "auto"),
    (8, 8, 64, 64, "auto"),
]


for fcl, (pe, simd, ififo, ofifo, ramstyle) in zip(fc_layers, config):
    fcl_inst = getCustomOp(fcl)
    fcl_inst.set_nodeattr("PE", pe)
    fcl_inst.set_nodeattr("SIMD", simd)
    fcl_inst.set_nodeattr("inFIFODepth", ififo)
    fcl_inst.set_nodeattr("outFIFODepth", ofifo)
    fcl_inst.set_nodeattr("ram_style", ramstyle)
    
# set parallelism for input quantizer to be same as first layer's SIMD
inp_qnt_node = model.get_nodes_by_op_type("Thresholding_Batch")[0]
inp_qnt = getCustomOp(inp_qnt_node)
inp_qnt.set_nodeattr("PE", 49)
model.save(model_set_folding_factors)  

In [None]:
showInNetron(model_set_folding_factors)

# Hardware build

In [None]:
pynq_board = "Ultra96"
fpga_part = pynq_part_map[pynq_board]
target_clk_ns = 10
#target_clk_ns = 5
#target_clk_ns = 20
model = ModelWrapper(model_partition)
model = model.transform(ZynqBuild(platform = pynq_board, period_ns = target_clk_ns))
model.save(model_post_synthesis)

In [None]:
showInNetron(model_post_synthesis)

In [None]:
model = ModelWrapper(model_post_synthesis)
sdp_node_middle = getCustomOp(model.graph.node[1])
postsynth_layers = sdp_node_middle.get_nodeattr("model")

showInNetron(postsynth_layers)

In [None]:
model = ModelWrapper(postsynth_layers)
model.model.metadata_props

In [None]:
import os
import shutil  
print (model.get_metadata_prop("pynq_driver_dir"))

# Deployment

We'll now use the `DeployToPYNQ` transformation to create a deployment folder with the bitfile and driver file(s), and copy that to the PYNQ board (can let it run for a while then stop to get the bitfiles, no need to wait for the whole cell to run finish, since it can't ssh into the Ultra96)

In [None]:
ip = "192.168.2.99"
port = "22"
username = "xilinx"
password = "xilinx"
target_dir = "/home/xilinx/finn_dev_wkexin"
model = model.transform(DeployToPYNQ(ip, port, username, password, target_dir))
model.save(build_dir + "/mlp_pynq_deploy.onnx")

In [None]:
showInNetron(model)