In [1]:
import netron
from IPython.display import IFrame

def showInNetron(model_filename):
    netron.start(model_filename, address=("0.0.0.0",8081))
    return IFrame(src="http://0.0.0.0:8081/",width="100%",height=400)

In [2]:
import brevitas.onnx as bo
from finn.core.modelwrapper import ModelWrapper
from finn.core.datatype import DataType

import ONNX model and check with Netron

In [2]:
model_file="./save/LeNet_brevitas_W4A4.onnx"

model=ModelWrapper(model_file)
model.set_tensor_datatype(model.graph.input[0].name, DataType.UINT8) # different from FINN v0.7
model.save("./test/LeNet_brevitas_W4A4.onnx")

In [4]:
showInNetron("./test/GR_brevitas_W4A4.onnx")

Serving './test/GR_brevitas_W4A4.onnx' at http://0.0.0.0:8081


tidy-up the model

In [3]:
import onnx
from finn.util.test import get_test_model_trained
import brevitas.onnx as bo
from finn.core.modelwrapper import ModelWrapper
from finn.transformation.infer_shapes import InferShapes
from finn.transformation.fold_constants import FoldConstants
from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames, RemoveStaticGraphInputs

model = ModelWrapper("./test/LeNet_brevitas_W4A4.onnx")
model = model.transform(InferShapes())
model = model.transform(FoldConstants())
model = model.transform(GiveUniqueNodeNames())
model = model.transform(GiveReadableTensorNames())
model = model.transform(RemoveStaticGraphInputs())
model.save("./test/LeNet_brevitas_W4A4_tidy.onnx")

pre- and post-processing 

In [5]:
from finn.util.pytorch import ToTensor
from finn.transformation.merge_onnx_models import MergeONNXModels
from finn.core.datatype import DataType

model = ModelWrapper("./test/LeNet_brevitas_W4A4_tidy.onnx")
global_inp_name = model.graph.input[0].name
ishape = model.get_tensor_shape(global_inp_name)
# preprocessing: torchvision's ToTensor divides uint8 inputs by 255
totensor_pyt = ToTensor()
chkpt_preproc_name = "./test/LeNet_brevitas_W4A4_preproc.onnx"
bo.export_finn_onnx(totensor_pyt, ishape, chkpt_preproc_name)

# join preprocessing and core model
pre_model = ModelWrapper(chkpt_preproc_name)
model = model.transform(MergeONNXModels(pre_model))
# add input quantization annotation
global_inp_name = model.graph.input[0].name
model.set_tensor_datatype(global_inp_name, DataType.UINT8)



In [6]:
from finn.transformation.insert_topk import InsertTopK
from finn.transformation.infer_datatypes import InferDataTypes

# postprocessing: insert Top-1 node at the end
model = model.transform(InsertTopK(k=1))
chkpt_name = "./test/LeNet_brevitas_W4A4_post.onnx"
# tidy-up again
model = model.transform(InferShapes())
model = model.transform(FoldConstants())
model = model.transform(GiveUniqueNodeNames())
model = model.transform(GiveReadableTensorNames())
model = model.transform(InferDataTypes())
model = model.transform(RemoveStaticGraphInputs())
model.save(chkpt_name)

showInNetron("./test/LeNet_brevitas_W4A4_post.onnx")

Serving './test/LeNet_brevitas_W4A4_post.onnx' at http://0.0.0.0:8081


streamline the model

In [9]:
from finn.transformation.streamline import Streamline
from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul
from finn.transformation.bipolar_to_xnor import ConvertBipolarMatMulToXnorPopcount
import finn.transformation.streamline.absorb as absorb
from finn.transformation.streamline.reorder import MakeMaxPoolNHWC, MoveScalarLinearPastInvariants, MoveFlattenPastAffine
from finn.transformation.infer_data_layouts import InferDataLayouts
from finn.transformation.general import RemoveUnusedTensors

model = ModelWrapper("./test/LeNet_brevitas_W4A4_post.onnx")
model = model.transform(MoveScalarLinearPastInvariants())
model = model.transform(Streamline())
model = model.transform(LowerConvsToMatMul())
model = model.transform(MakeMaxPoolNHWC())

model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold())

model = model.transform(MakeMaxPoolNHWC())
model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold())

model = model.transform(ConvertBipolarMatMulToXnorPopcount())
model = model.transform(Streamline())
# absorb final add-mul nodes into TopK
model = model.transform(absorb.AbsorbScalarMulAddIntoTopK())
model = model.transform(InferDataLayouts())
model = model.transform(RemoveUnusedTensors())



model.save("./test/LeNet_brevitas_W4A4_streamlined.onnx")

convert to HLS layers

In [None]:
import os
import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
from finn.transformation.fpgadataflow.create_dataflow_partition import (
    CreateDataflowPartition,
)
from finn.transformation.move_reshape import RemoveCNVtoFCFlatten
from finn.custom_op.registry import getCustomOp
from finn.transformation.infer_data_layouts import InferDataLayouts

output_dir = "./test/output_ipstitch/intermediate_models"

if not os.path.exists(output_dir):
    os.makedirs(output_dir)


# choose the memory mode for the MVTU units, decoupled or const
mem_mode = "const"

model = ModelWrapper("./test/ECG_AF_2D_w4a4_streamlined.onnx")



#'''
model = model.transform(to_hls.InferQuantizedStreamingFCLayer(mem_mode))
# TopK to LabelSelect
model = model.transform(to_hls.InferLabelSelectLayer())
# input quantization (if any) to standalone thresholding


#model = model.transform(to_hls.InferConvInpGen())
model = model.transform(to_hls.InferVVAU())
model = model.transform(to_hls.InferStreamingMaxPool())
model = model.transform(to_hls.InferPool_Batch())

model = model.transform(to_hls.InferThresholdingLayer())
model = model.transform(to_hls.InferConvInpGen())
model = model.transform(to_hls.InferStreamingMaxPool())

model = model.transform(RemoveCNVtoFCFlatten())
# get rid of Tranpose -> Tranpose identity seq
model = model.transform(absorb.AbsorbConsecutiveTransposes())
# infer tensor data layouts
model = model.transform(InferDataLayouts())


#'''
#from finn-examples
model = model.transform(to_hls.InferPool_Batch())
model = model.transform(to_hls.InferConvInpGen())
model = model.transform(to_hls.InferVVAU())
model = model.transform(to_hls.InferQuantizedStreamingFCLayer(mem_mode))
model = model.transform(to_hls.InferChannelwiseLinearLayer())
model = model.transform(to_hls.InferLabelSelectLayer())
model = model.transform(InferShapes())
model = model.transform(GiveUniqueNodeNames())
model = model.transform(GiveReadableTensorNames())

#'''

model = model.transform(InferDataTypes())
model = model.transform(RemoveStaticGraphInputs())
model = model.transform(RemoveUnusedTensors())


# dataflow partitioning

model.save("./test/ECG_AF_2D_w4a4_hls.onnx")
parent_model = model.transform(CreateDataflowPartition())
parent_model.save("./test/output_ipstitch/intermediate_models/dataflow_parent.onnx")
sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0]
sdp_node = getCustomOp(sdp_node)
dataflow_model_filename = sdp_node.get_nodeattr("model")
# save the dataflow partition with a different name for easier access
dataflow_model = ModelWrapper(dataflow_model_filename)
dataflow_model.save("./test/output_ipstitch/intermediate_models/dataflow_model.onnx")

creat stitched IP core

In [4]:
import finn.builder.build_dataflow as build
import finn.builder.build_dataflow_config as build_cfg

model_file = "./test/LeNet_brevitas_W2A2_post.onnx"

rtlsim_output_dir = "./test/100_least"

cfg = build.DataflowBuildConfig(
    output_dir          = rtlsim_output_dir,
    #target_fps          = 10000,
    synth_clk_period_ns = 10.0, #50 MHz target frequency
    #fpga_part           = "xc7a50tfgg484-1",
    fpga_part           = "xc7a15tfgg484-1", # only once
    folding_config_file = "./test/w2a2_pe1_test.json", # choose configuration for compiling
    #folding_config_file = "./test/w2a2_multi_pe_test.json",

    default_mem_mode = build_cfg.ComputeEngineMemMode.CONST,
    large_fifo_mem_style = build_cfg.LargeFIFOMemStyle.BRAM,
    
    generate_outputs=[
        build_cfg.DataflowOutputType.STITCHED_IP,
    ]
)

build.build_dataflow_cfg(model_file, cfg)

Building dataflow accelerator from ./test/LeNet_brevitas_W2A2_post.onnx
Intermediate outputs will be generated in /tmp/finn_dev_wenlong
Final outputs will be generated in ./test/100_least
Build log is at ./test/100_least/build_dataflow.log
Running step: step_tidy_up [1/16]
Running step: step_streamline [2/16]
Running step: step_convert_to_hls [3/16]
Running step: step_create_dataflow_partition [4/16]
Running step: step_target_fps_parallelization [5/16]
Running step: step_apply_folding_config [6/16]
Running step: step_generate_estimate_reports [7/16]
Running step: step_hls_codegen [8/16]
Running step: step_hls_ipgen [9/16]
Running step: step_set_fifo_depths [10/16]
Running step: step_create_stitched_ip [11/16]
Running step: step_measure_rtlsim_performance [12/16]
Running step: step_out_of_context_synthesis [13/16]
Running step: step_synthesize_bitfile [14/16]
Running step: step_make_pynq_driver [15/16]
Running step: step_deployment_package [16/16]
Completed successfully


0

check the result of each step

In [13]:
showInNetron("./test/output_ipstitch/intermediate_models/4_step_create_dataflow_partition.onnx")

Stopping http://0.0.0.0:8081
Serving './test/output_ipstitch/intermediate_models/4_step_create_dataflow_partition.onnx' at http://0.0.0.0:8081


In [12]:
showInNetron("./test/output_ipstitch/intermediate_models/dataflow_parent.onnx")

Stopping http://0.0.0.0:8081
Serving './test/output_ipstitch/intermediate_models/dataflow_parent.onnx' at http://0.0.0.0:8081
