In [1]:
import netron
from IPython.display import IFrame

def showInNetron(model_filename):
    netron.start(model_filename, address=("0.0.0.0",8081))
    return IFrame(src="http://0.0.0.0:8081/",width="100%",height=400)

In [2]:
import brevitas.onnx as bo
from finn.core.modelwrapper import ModelWrapper
from finn.core.datatype import DataType

In [3]:
import onnx
from finn.util.test import get_test_model_trained
import brevitas.onnx as bo
from finn.core.modelwrapper import ModelWrapper
from finn.transformation.infer_shapes import InferShapes
from finn.transformation.fold_constants import FoldConstants
from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames, RemoveStaticGraphInputs
from finn.util.pytorch import ToTensor
from finn.transformation.merge_onnx_models import MergeONNXModels
from finn.core.datatype import DataType
from finn.transformation.insert_topk import InsertTopK
from finn.transformation.infer_datatypes import InferDataTypes
from finn.transformation.streamline import Streamline
from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul
from finn.transformation.bipolar_to_xnor import ConvertBipolarMatMulToXnorPopcount
import finn.transformation.streamline.absorb as absorb
from finn.transformation.streamline.reorder import MakeMaxPoolNHWC, MoveScalarLinearPastInvariants, MoveFlattenPastAffine
from finn.transformation.infer_data_layouts import InferDataLayouts
from finn.transformation.general import RemoveUnusedTensors

import os
import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
from finn.transformation.fpgadataflow.create_dataflow_partition import (
    CreateDataflowPartition,
)
from finn.transformation.move_reshape import RemoveCNVtoFCFlatten
from finn.custom_op.registry import getCustomOp
from finn.transformation.infer_data_layouts import InferDataLayouts


import finn.builder.build_dataflow as build
import finn.builder.build_dataflow_config as build_cfg

## compile 1D model

In [8]:
model_file="./multi_model_test/save_model/mnist_1D_3136_s.onnx" # modify if model is changed

model=ModelWrapper(model_file)
model.set_tensor_datatype(model.graph.input[0].name, DataType.UINT8)
model.save("./multi_model_test/test_model/mnist_1D_3136_s.onnx") # modify if model is changed

model = model.transform(InferShapes())
model = model.transform(FoldConstants())
model = model.transform(GiveUniqueNodeNames())
model = model.transform(GiveReadableTensorNames())
model = model.transform(RemoveStaticGraphInputs())


global_inp_name = model.graph.input[0].name
ishape = model.get_tensor_shape(global_inp_name)
# preprocessing: torchvision's ToTensor divides uint8 inputs by 255
totensor_pyt = ToTensor()
chkpt_preproc_name = "./multi_model_test/test_temp/mnist_1D.onnx"
bo.export_finn_onnx(totensor_pyt, ishape, chkpt_preproc_name)

# join preprocessing and core model
pre_model = ModelWrapper(chkpt_preproc_name)
model = model.transform(MergeONNXModels(pre_model))
# add input quantization annotation: UINT8 
global_inp_name = model.graph.input[0].name
model.set_tensor_datatype(global_inp_name, DataType.UINT8)

# postprocessing: insert Top-1 node at the end
model = model.transform(InsertTopK(k=1))
chkpt_name = "./multi_model_test/test_temp/mnist_1D.onnx"
# tidy-up again
model = model.transform(InferShapes())
model = model.transform(FoldConstants())
model = model.transform(GiveUniqueNodeNames())
model = model.transform(GiveReadableTensorNames())
model = model.transform(InferDataTypes())
model = model.transform(RemoveStaticGraphInputs())
model.save(chkpt_name)


model = model.transform(MoveScalarLinearPastInvariants())
model = model.transform(Streamline())
model = model.transform(LowerConvsToMatMul())
model = model.transform(MakeMaxPoolNHWC())

model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold())

model = model.transform(MakeMaxPoolNHWC())
model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold())

model = model.transform(Streamline())
# absorb final add-mul nodes into TopK
model = model.transform(absorb.AbsorbScalarMulAddIntoTopK())
model = model.transform(InferDataLayouts())
model = model.transform(RemoveUnusedTensors())

model.save("./multi_model_test/test_temp/mnist_1D.onnx")


# HLS

output_dir = "./multi_model_test/test_temp/mnist_1D/3136_s" # modify if model is changed

if not os.path.exists(output_dir + "/intermediate_models"):
    os.makedirs(output_dir + "/intermediate_models")


# choose the memory mode for the MVTU units, decoupled or const
mem_mode = "const"

model = ModelWrapper("./multi_model_test/test_temp/mnist_1D.onnx")


model = model.transform(to_hls.InferQuantizedStreamingFCLayer(mem_mode))
# TopK to LabelSelect
model = model.transform(to_hls.InferLabelSelectLayer())
# input quantization (if any) to standalone thresholding


model = model.transform(to_hls.InferStreamingMaxPool())
model = model.transform(to_hls.InferPool_Batch())

model = model.transform(to_hls.InferThresholdingLayer())
model = model.transform(to_hls.InferConvInpGen())
model = model.transform(to_hls.InferStreamingMaxPool())
# get rid of Reshape(-1, 1) operation between hlslib nodes
model = model.transform(RemoveCNVtoFCFlatten())
# get rid of Tranpose -> Tranpose identity seq
model = model.transform(absorb.AbsorbConsecutiveTransposes())
# infer tensor data layouts
model = model.transform(InferDataLayouts())


model = model.transform(InferDataTypes())
model = model.transform(RemoveStaticGraphInputs())
model = model.transform(RemoveUnusedTensors())




model.save("./multi_model_test/test_temp/mnist_1D.onnx")
parent_model = model.transform(CreateDataflowPartition())
parent_model.save(output_dir + "/intermediate_models/dataflow_parent.onnx")
sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0]
sdp_node = getCustomOp(sdp_node)
dataflow_model_filename = sdp_node.get_nodeattr("model")
# save the dataflow partition with a different name for easier access
dataflow_model = ModelWrapper(dataflow_model_filename)
dataflow_model.save(output_dir + "/intermediate_models/dataflow_model.onnx")





check if all layers are converted to HLS

In [5]:
showInNetron("./multi_model_test/test_temp/mnist_1D.onnx")

Serving './multi_model_test/test_temp/mnist_1D.onnx' at http://0.0.0.0:8081


generate stitched IP Core

In [9]:
import finn.builder.build_dataflow as build
import finn.builder.build_dataflow_config as build_cfg

model_file = "./multi_model_test/test_temp/mnist_1D.onnx"

rtlsim_output_dir = output_dir

cfg = build.DataflowBuildConfig(
    output_dir          = rtlsim_output_dir,
    synth_clk_period_ns = 20.0, #50 MHz             # set target frequency
    fpga_part           = "xc7a100tfgg484-1",       # set target FPGA board
    #fpga_part           = "xc7a15tfgg484-1", 
    #fpga_part           = "xc7a200tfbg484-1",
    
    default_mem_mode = build_cfg.ComputeEngineMemMode.CONST,
    large_fifo_mem_style = build_cfg.LargeFIFOMemStyle.LUTRAM, # large FIFO set LUTRAM
    #large_fifo_mem_style = build_cfg.LargeFIFOMemStyle.AUTO,
    
    #auto_fifo_depths = False,
    
    folding_config_file = "./multi_model_test/config/Least_pe_malexnet_1D_without_BRAM.json",# choose a configuration according to requirements
    #folding_config_file = "./multi_model_test/config/Least_pe_malexnet_1D_auto.json",
    
    steps = ["step_create_dataflow_partition",
             "step_target_fps_parallelization",
             "step_apply_folding_config",
             "step_generate_estimate_reports",
             "step_hls_codegen",
             "step_hls_ipgen",
             "step_set_fifo_depths",
            "step_create_stitched_ip",],
             
   

    
    generate_outputs=[
        build_cfg.DataflowOutputType.STITCHED_IP,
    ]
)

build.build_dataflow_cfg(model_file, cfg)

Building dataflow accelerator from ./multi_model_test/test_temp/mnist_1D.onnx
Intermediate outputs will be generated in /tmp/finn_dev_wenlong
Final outputs will be generated in ./multi_model_test/test_temp/mnist_1D/3136_s
Build log is at ./multi_model_test/test_temp/mnist_1D/3136_s/build_dataflow.log
Running step: step_create_dataflow_partition [1/8]
Running step: step_target_fps_parallelization [2/8]
Running step: step_apply_folding_config [3/8]
Running step: step_generate_estimate_reports [4/8]
Running step: step_hls_codegen [5/8]
Running step: step_hls_ipgen [6/8]
Running step: step_set_fifo_depths [7/8]
Running step: step_create_stitched_ip [8/8]
Completed successfully


0

## compile 2D model

In [9]:
model_file="./multi_model_test/save_model/model_2D_28_28_s.onnx"  # modify if model is changed

model=ModelWrapper(model_file)
model.set_tensor_datatype(model.graph.input[0].name, DataType.UINT8)
model.save("./multi_model_test/test_model/model_2D_28_28_s.onnx")  # modify if model is changed

model = model.transform(InferShapes())
model = model.transform(FoldConstants())
model = model.transform(GiveUniqueNodeNames())
model = model.transform(GiveReadableTensorNames())
model = model.transform(RemoveStaticGraphInputs())


global_inp_name = model.graph.input[0].name
ishape = model.get_tensor_shape(global_inp_name)
# preprocessing: torchvision's ToTensor divides uint8 inputs by 255
totensor_pyt = ToTensor()
chkpt_preproc_name = "./multi_model_test/test_temp/model_2D.onnx"
bo.export_finn_onnx(totensor_pyt, ishape, chkpt_preproc_name)

# join preprocessing and core model
pre_model = ModelWrapper(chkpt_preproc_name)
model = model.transform(MergeONNXModels(pre_model))
# add input quantization annotation: UINT8
global_inp_name = model.graph.input[0].name
model.set_tensor_datatype(global_inp_name, DataType.UINT8)

# postprocessing: insert Top-1 node at the end
model = model.transform(InsertTopK(k=1))
chkpt_name = "./multi_model_test/test_temp/model_2D.onnx"
# tidy-up again
model = model.transform(InferShapes())
model = model.transform(FoldConstants())
model = model.transform(GiveUniqueNodeNames())
model = model.transform(GiveReadableTensorNames())
model = model.transform(InferDataTypes())
model = model.transform(RemoveStaticGraphInputs())
model.save(chkpt_name)


model = model.transform(MoveScalarLinearPastInvariants())
model = model.transform(Streamline())
model = model.transform(LowerConvsToMatMul())
model = model.transform(MakeMaxPoolNHWC())

model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold())

model = model.transform(MakeMaxPoolNHWC())
model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold())

model = model.transform(Streamline())
# absorb final add-mul nodes into TopK
model = model.transform(absorb.AbsorbScalarMulAddIntoTopK())
model = model.transform(InferDataLayouts())
model = model.transform(RemoveUnusedTensors())

model.save("./multi_model_test/test_temp/model_2D.onnx")


# HLS

output_dir = "./multi_model_test/test_temp/mnist_2D/28"

if not os.path.exists(output_dir + "/intermediate_models"):
    os.makedirs(output_dir + "/intermediate_models")


# choose the memory mode for the MVTU units, decoupled or const
mem_mode = "const"

model = ModelWrapper("./multi_model_test/test_temp/model_2D.onnx")  # modify if model is changed

#'''
model = model.transform(to_hls.InferQuantizedStreamingFCLayer(mem_mode))
# TopK to LabelSelect
model = model.transform(to_hls.InferLabelSelectLayer())
# input quantization (if any) to standalone thresholding


model = model.transform(to_hls.InferStreamingMaxPool())
model = model.transform(to_hls.InferPool_Batch())

model = model.transform(to_hls.InferThresholdingLayer())
model = model.transform(to_hls.InferConvInpGen())
model = model.transform(to_hls.InferStreamingMaxPool())
# get rid of Reshape(-1, 1) operation between hlslib nodes
model = model.transform(RemoveCNVtoFCFlatten())
# get rid of Tranpose -> Tranpose identity seq
model = model.transform(absorb.AbsorbConsecutiveTransposes())
# infer tensor data layouts
model = model.transform(InferDataLayouts())


model = model.transform(InferDataTypes())
model = model.transform(RemoveStaticGraphInputs())
model = model.transform(RemoveUnusedTensors())




model.save("./multi_model_test/test_temp/model_2D.onnx")
parent_model = model.transform(CreateDataflowPartition())
parent_model.save(output_dir + "/intermediate_models/dataflow_parent.onnx")
sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0]
sdp_node = getCustomOp(sdp_node)
dataflow_model_filename = sdp_node.get_nodeattr("model")
# save the dataflow partition with a different name for easier access
dataflow_model = ModelWrapper(dataflow_model_filename)
dataflow_model.save(output_dir + "/intermediate_models/dataflow_model.onnx")




check if all layers are converted to HLS

In [5]:
showInNetron("./multi_model_test/test_temp/model_2D.onnx")

Serving './multi_model_test/test_temp/model_2D.onnx' at http://0.0.0.0:8081


generate stitched IP Core

In [10]:
model_file = "./multi_model_test/test_temp/model_2D.onnx"

rtlsim_output_dir = output_dir

cfg = build.DataflowBuildConfig(
    output_dir          = rtlsim_output_dir,
    synth_clk_period_ns = 20.0, #50 MHz         # set target frequency
    fpga_part           = "xc7a50tfgg484-1",    # set target FPGA board
    #fpga_part           = "xc7a15tfgg484-1", 
    
    default_mem_mode = build_cfg.ComputeEngineMemMode.CONST,
    large_fifo_mem_style = build_cfg.LargeFIFOMemStyle.LUTRAM,   # large FIFO set LUTRAM
    #large_fifo_mem_style = build_cfg.LargeFIFOMemStyle.AUTO,
    
    folding_config_file = "./multi_model_test/config/Least_pe_malexnet_2D_without_BRAM.json", # choose a configuration according to requirements
    #folding_config_file = "./multi_model_test/config/Least_pe_malexnet_2D_auto.json",
    
    steps = ["step_create_dataflow_partition",
             "step_target_fps_parallelization",
             "step_apply_folding_config",
             "step_generate_estimate_reports",
             "step_hls_codegen",
             "step_hls_ipgen",
             "step_set_fifo_depths",
            "step_create_stitched_ip",],
             
   

    
    generate_outputs=[
        build_cfg.DataflowOutputType.STITCHED_IP,
    ]
)

build.build_dataflow_cfg(model_file, cfg)

Building dataflow accelerator from ./multi_model_test/test_temp/model_2D.onnx
Intermediate outputs will be generated in /tmp/finn_dev_wenlong
Final outputs will be generated in ./multi_model_test/test_temp/mnist_2D/28
Build log is at ./multi_model_test/test_temp/mnist_2D/28/build_dataflow.log
Running step: step_create_dataflow_partition [1/8]
Running step: step_target_fps_parallelization [2/8]
Running step: step_apply_folding_config [3/8]
Running step: step_generate_estimate_reports [4/8]
Running step: step_hls_codegen [5/8]
Running step: step_hls_ipgen [6/8]
Running step: step_set_fifo_depths [7/8]
Running step: step_create_stitched_ip [8/8]
Completed successfully


0