In [1]:
import onnx
import brevitas.onnx as bo
import brevitas.nn as qnn
import torch
import torch.nn as nn
import finn.transformation.streamline.absorb as absorb
import finn.transformation.streamline.reorder as reorder
import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
import torch.nn.functional as F

from brevitas.core.quant import QuantType
from finn.core.datatype import DataType
from finn.core.modelwrapper import ModelWrapper
from finn.util.basic import pynq_part_map
from finn.util.pytorch import ToTensor
from finn.util.visualization import showSrc, showInNetron
from finn.transformation.bipolar_to_xnor import ConvertBipolarMatMulToXnorPopcount
from finn.transformation.infer_shapes import InferShapes
from finn.transformation.infer_datatypes import InferDataTypes
from finn.transformation.infer_data_layouts import InferDataLayouts
from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames, RemoveStaticGraphInputs
from finn.transformation.general import RemoveUnusedTensors
from finn.transformation.fold_constants import FoldConstants
from finn.transformation.merge_onnx_models import MergeONNXModels
from finn.transformation.streamline import Streamline
from finn.transformation.streamline.round_thresholds import RoundAndClipThresholds
from finn.transformation.streamline.reorder import MakeMaxPoolNHWC, MoveScalarLinearPastInvariants
from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul
from finn.transformation.insert_topk import InsertTopK
from finn.transformation.fpgadataflow.make_zynq_proj import ZynqBuild
from finn.transformation.fpgadataflow.create_dataflow_partition import CreateDataflowPartition

build_dir = "/workspace/finn/notebooks/fpga/mlp(newest)"


### Tidy up the exported onnx

In [3]:
model = ModelWrapper(build_dir + "/mlp.onnx")
model = model.transform(InferShapes())
model = model.transform(GiveUniqueNodeNames())
model = model.transform(GiveReadableTensorNames())
model = model.transform(InferDataTypes())
model = model.transform(RemoveStaticGraphInputs())
model.save(build_dir + "/mlp_tidy.onnx")
#showInNetron(build_dir + "/mlp_tidy.onnx")

### Preprocessing
Preprocess the input by exporting it as a single-node ONNX graph and dividing it by 256 into floats between 0 and 1. Then, merge it back to the original model and mark the input tensor as 8-bit for FINN to know the level of precision use. 


In [4]:
model = ModelWrapper(build_dir + "/mlp_tidy.onnx")
global_inp_name = model.graph.input[0].name
ishape = model.get_tensor_shape(global_inp_name)

# preprocessing: torchvision's ToTensor divides uint8 inputs by 255
chkpt_preproc_name = build_dir + "/mlp_preproc.onnx"
bo.export_finn_onnx(ToTensor(), ishape, chkpt_preproc_name)

# join preprocessing and core model
pre_model = ModelWrapper(chkpt_preproc_name)
model = model.transform(MergeONNXModels(pre_model))

# add input quantization annotation: UINT8 for BNN (finn default)
global_inp_name = model.graph.input[0].name
model.set_tensor_datatype(global_inp_name, DataType.UINT8)

model.save(build_dir + "/mlp_merge.onnx")
#showInNetron(build_dir + "/mlp_merge.onnx")



### Streamlining
Use in-built FINN functions to do streamlining and tidy-up after

In [5]:
model = ModelWrapper(build_dir + "/mlp_merge.onnx")
model = model.transform(MoveScalarLinearPastInvariants())
model = model.transform(Streamline())
model = model.transform(InferDataLayouts())
model = model.transform(RemoveUnusedTensors())
model = model.transform(reorder.MoveScalarLinearPastInvariants())
model = model.transform(reorder.MoveScalarAddPastMatMul())
model = model.transform(reorder.MoveScalarMulPastMatMul())
model = model.transform(absorb.AbsorbMulIntoMultiThreshold())
model = model.transform(absorb.AbsorbAddIntoMultiThreshold())
model = model.transform(absorb.AbsorbMulIntoMultiThreshold())
model = model.transform(absorb.AbsorbScalarMulAddIntoTopK())

# bit of tidy-up
model = model.transform(InferDataLayouts())
model = model.transform(RemoveUnusedTensors())

model.save(build_dir + "/mlp_hls_ready.onnx")
#showInNetron(build_dir + "/mlp_hls_ready.onnx")

In [6]:
model = ModelWrapper(build_dir + "/mlp_hls_ready.onnx")
model = model.transform(to_hls.InferAddStreamsLayer())

model = model.transform(to_hls.InferThresholdingLayer())
model = model.transform(to_hls.InferQuantizedStreamingFCLayer("const"))
model = model.transform(to_hls.InferChannelwiseLinearLayer())
model.save(build_dir + "/mlp_hls_layers.onnx")
#showInNetron(build_dir + "/mlp_hls_layers.onnx")

In [7]:
model = ModelWrapper(build_dir + "/mlp_hls_layers.onnx")

parent_model = model.transform(CreateDataflowPartition())
parent_model.save(build_dir + "/mlp_dataflow_parent.onnx")
#showInNetron(build_dir + "/mlp_dataflow_parent.onnx")

In [8]:
from finn.custom_op.registry import getCustomOp
sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0]
sdp_node = getCustomOp(sdp_node)
dataflow_model_filename = sdp_node.get_nodeattr("model")
showInNetron(dataflow_model_filename)
model = ModelWrapper(dataflow_model_filename)
model.save(build_dir + "/mlp_partition.onnx")
#showInNetron(build_dir + "/mlp_partition.onnx")

IndexError: list index out of range

In [10]:
fc0 = model.graph.node[0]
fc0w = getCustomOp(fc0)

#print("CustomOp wrapper is of class " + fc0w.__class__.__name__)
#fc0w.get_nodeattr_types()

In [11]:
fc_layers = model.get_nodes_by_op_type("StreamingFCLayer_Batch")
# (PE, SIMD, in_fifo_depth, out_fifo_depth, ramstyle) for each layer
config = [
    (16, 8, 168, 256, "auto"),    
    (16, 8, 256, 128, "auto"),
    (16, 8, 128, 64, "auto"),
    (3, 8, 64, 3, "auto"),
]

for fcl, (pe, simd, ififo, ofifo, ramstyle) in zip(fc_layers, config):
    fcl_inst = getCustomOp(fcl)
    fcl_inst.set_nodeattr("PE", pe)
    fcl_inst.set_nodeattr("SIMD", simd)
    fcl_inst.set_nodeattr("inFIFODepth", ififo)
    fcl_inst.set_nodeattr("outFIFODepth", ofifo)
    fcl_inst.set_nodeattr("ram_style", ramstyle)
    
    
'''
# set parallelism for input quantizer to be same as first layer's SIMD
inp_qnt_node = model.get_nodes_by_op_type("Thresholding_Batch")[0]
inp_qnt = getCustomOp(inp_qnt_node)
inp_qnt.set_nodeattr("PE", 49)
'''

'\n# set parallelism for input quantizer to be same as first layer\'s SIMD\ninp_qnt_node = model.get_nodes_by_op_type("Thresholding_Batch")[0]\ninp_qnt = getCustomOp(inp_qnt_node)\ninp_qnt.set_nodeattr("PE", 49)\n'

In [12]:
model.save(build_dir + "/mlp_set_folding_factors.onnx")
showInNetron(build_dir + "/mlp_set_folding_factors.onnx")


Stopping http://0.0.0.0:8081
Serving '/workspace/finn/notebooks/fpga/mlp_v4/mlp_set_folding_factors.onnx' at http://0.0.0.0:8081


In [None]:
# map to the board
pynq_board = "Ultra96"
fpga_part = pynq_part_map[pynq_board]
target_clk_ns = 10
model = ModelWrapper(build_dir + "/mlp_partition.onnx")
model = model.transform(ZynqBuild(platform = pynq_board, period_ns = target_clk_ns))

In [None]:
# Post synthesis
model.save(build_dir + "/mlp_post_synthesis.onnx")
showInNetron(build_dir + "/mlp_post_synthesis.onnx")

In [None]:
model = ModelWrapper(build_dir + "/mlp_post_synthesis.onnx")
sdp_node_middle = getCustomOp(model.graph.node[1])
postsynth_layers = sdp_node_middle.get_nodeattr("model")

showInNetron(postsynth_layers)

In [None]:
model = ModelWrapper(postsynth_layers)
model.model.metadata_props

In [None]:
! ls {model.get_metadata_prop("vivado_pynq_proj")}

In [None]:
from finn.transformation.fpgadataflow.make_deployment import DeployToPYNQ
ip = "137.132.86.230"
port = "22"
username = "xilinx"
password = "xilinx"
target_dir = "/home/xilinx/finn_dev_wkexin"
model = model.transform(DeployToPYNQ(ip, port, username, password, target_dir))
model.save(build_dir + "/mlp_pynq_deploy.onnx")

In [None]:
model.model.metadata_props

In [None]:
! sshpass -p {password} ssh {username}@{ip} -p {port} 'ls -l {target_dir_pynq}'

In [None]:
import numpy as np
from finn.core.onnx_exec import execute_onnx

input_dict = {iname: x.reshape(ishape)}
ret = execute_onnx(model, input_dict) #need to change the input dict

In [None]:
ret[oname]