In [1]:
import onnx
import torch
import torch.nn as nn
import brevitas
from torch.nn import BatchNorm2d
from torch.nn import MaxPool2d
from torch.nn import Module
from brevitas.nn import QuantLinear
from brevitas.nn import QuantReLU
from brevitas.nn import QuantIdentity
from brevitas.nn import QuantConv2d
from brevitas.core.quant import QuantType
import torchvision
import torchvision.transforms as transforms

In [2]:
batch_size = 64
num_classes = 10
learning_rate = 0.001
num_epochs = 10

In [3]:
train_dataset = torchvision.datasets.MNIST(root = './data',
                                           train = True,
                                           transform = transforms.Compose([
                                                  transforms.Resize((32,32)),
                                                  transforms.ToTensor(),
                                                  transforms.Normalize(mean = (0.1307,), std = (0.3081,))]),
                                           download = True)


test_dataset = torchvision.datasets.MNIST(root = './data',
                                          train = False,
                                          transform = transforms.Compose([
                                                  transforms.Resize((32,32)),
                                                  transforms.ToTensor(),
                                                  transforms.Normalize(mean = (0.1325,), std = (0.3105,))]),
                                          download=True)


train_loader = torch.utils.data.DataLoader(dataset = train_dataset,
                                           batch_size = batch_size,
                                           shuffle = True)


test_loader = torch.utils.data.DataLoader(dataset = test_dataset,
                                           batch_size = batch_size,
                                           shuffle = True)

In [4]:
class Lenet_Quant(nn.Module):
    def __init__(self, num_classes):
        super(Lenet_Quant, self).__init__()
        self.layer1 = nn.Sequential(
        QuantConv2d(1, 6, kernel_size=(5,5), bias=False), #stride =1 padding = 0 by default
        BatchNorm2d(6),
        QuantReLU(),
        MaxPool2d(kernel_size =2, stride = 2))
        self.layer2 = nn.Sequential(QuantConv2d(6, 16, kernel_size = (5,5), bias = False),
        BatchNorm2d(16),
        QuantReLU(),
        MaxPool2d(kernel_size=2, stride =2))
        self.layer3 = nn.Sequential(
        QuantLinear(400, 120, bias = False),
        QuantReLU(),
        QuantLinear(120, 84, bias = False),
        QuantReLU(),
        QuantLinear(84, num_classes, bias = False))
        
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = out.reshape(out.size(0), -1)
        out = self.layer3(out)
        return out

In [5]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Lenet_Quant(num_classes)
model.load_state_dict(torch.load('qlenet5.pth', map_location=torch.device('cpu')))
model.eval()


Lenet_Quant(
  (layer1): Sequential(
    (0): QuantConv2d(
      1, 6, kernel_size=(5, 5), stride=(1, 1), bias=False
      (input_quant): ActQuantProxyFromInjector(
        (_zero_hw_sentinel): StatelessBuffer()
      )
      (output_quant): ActQuantProxyFromInjector(
        (_zero_hw_sentinel): StatelessBuffer()
      )
      (weight_quant): WeightQuantProxyFromInjector(
        (_zero_hw_sentinel): StatelessBuffer()
        (tensor_quant): RescalingIntQuant(
          (int_quant): IntQuant(
            (float_to_int_impl): RoundSte()
            (tensor_clamp_impl): TensorClampSte()
            (delay_wrapper): DelayWrapper(
              (delay_impl): _NoDelay()
            )
          )
          (scaling_impl): StatsFromParameterScaling(
            (parameter_list_stats): _ParameterListStats(
              (first_tracked_param): _ViewParameterWrapper(
                (view_shape_impl): OverTensorView()
              )
              (stats): _Stats(
                (stats_impl): 

In [6]:
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print('Accuracy of the network on the 10000 test images: {} %'.format(100 * correct / total))

  return super(Tensor, self).rename(names)


Accuracy of the network on the 10000 test images: 99.12 %


In [13]:
import brevitas.onnx as bo

export_onnx_path = "Lenet5_Quant.onnx"
input_shape = torch.rand(1,1,32,32)
bo.export_qonnx(model, input_shape, export_onnx_path)

AttributeError: module 'brevitas.onnx' has no attribute 'export_finn_onnx'

In [9]:
from finn.util.visualization import showSrc, showInNetron
from qonnx.core.modelwrapper import ModelWrapper
o_model = ModelWrapper("onnx_models/Lenet5_Quant.onnx")

In [10]:
showInNetron("onnx_models/Lenet5_Quant.onnx")

Serving 'onnx_models/Lenet5_Quant.onnx' at http://0.0.0.0:8081


In [11]:
from qonnx.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames, RemoveStaticGraphInputs
from qonnx.transformation.infer_shapes import InferShapes
from qonnx.transformation.infer_datatypes import InferDataTypes
from qonnx.transformation.fold_constants import FoldConstants

o_model = o_model.transform(InferShapes())
o_model = o_model.transform(FoldConstants())
o_model = o_model.transform(GiveUniqueNodeNames())
o_model = o_model.transform(GiveReadableTensorNames())
o_model = o_model.transform(InferDataTypes())
o_model = o_model.transform(RemoveStaticGraphInputs())

o_model.save("onnx_models/lenet5_quant_tidy.onnx")
showInNetron("onnx_models/lenet5_quant_tidy.onnx")

Stopping http://0.0.0.0:8081
Serving 'onnx_models/lenet5_quant_tidy.onnx' at http://0.0.0.0:8081


In [21]:
!pip install --upgrade torchvision

Defaulting to user installation because normal site-packages is not writeable
Collecting torchvision
  Downloading torchvision-0.17.1-cp310-cp310-manylinux1_x86_64.whl (6.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m11.9 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting torch==2.2.1
  Downloading torch-2.2.1-cp310-cp310-manylinux1_x86_64.whl (755.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m755.5/755.5 MB[0m [31m440.9 kB/s[0m eta [36m0:00:00[0m00:01[0m00:02[0m
[?25hCollecting nvidia-cufft-cu12==11.0.2.54
  Downloading nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl (121.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m121.6/121.6 MB[0m [31m10.4 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105
  Downloading nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
from finn.util.pytorch import ToTensor
from qonnx.transformation.merge_onnx_models import MergeONNXModels
from qonnx.core.datatype import DataType


pmodel = ModelWrapper("onnx_models/lenet5_quant_tidy.onnx")
global_inp_name = pmodel.graph.input[0].name
ishape = pmodel.get_tensor_shape(global_inp_name)
# preprocessing: torchvision's ToTensor divides uint8 inputs by 255
totensor_pyt = ToTensor()
chkpt_preproc_name = "onnx_models/lenet5_quant_tidy_preproc.onnx"
bo.export_qonnx(totensor_pyt, ishape, chkpt_preproc_name)

# join preprocessing and core model
pre_model = ModelWrapper(chkpt_preproc_name)
pmodel = pmodel.transform(MergeONNXModels(pre_model))
# add input quantization annotation: UINT8 for all BNN-PYNQ models
global_inp_name = pmodel.graph.input[0].name
pmodel.set_tensor_datatype(global_inp_name, DataType["UINT8"])



from qonnx.transformation.insert_topk import InsertTopK
from qonnx.transformation.infer_datatypes import InferDataTypes

# postprocessing: insert Top-1 node at the end
pmodel = pmodel.transform(InsertTopK(k=1))

# tidy-up again
pmodel = pmodel.transform(InferShapes())
pmodel = pmodel.transform(FoldConstants())
pmodel = pmodel.transform(GiveUniqueNodeNames())
pmodel = pmodel.transform(GiveReadableTensorNames())
pmodel = pmodel.transform(InferDataTypes())
pmodel = pmodel.transform(RemoveStaticGraphInputs())



pmodel.save("onnx_models/lenet5_quant_tidy_pre_post_proc.onnx")
showInNetron("onnx_models/lenet5_quant_tidy_pre_post_proc.onnx")

In [23]:
#new model
from qonnx.util.cleanup import cleanup_model
from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN
new_model = ModelWrapper("onnx_models/lenet5_quant_tidy_pre_post_proc.onnx")
#necessary transformations
from qonnx.transformation.fold_constants import FoldConstants
from qonnx.transformation.infer_shapes import InferShapes
from qonnx.transformation.infer_datatypes import InferDataTypes
from qonnx.transformation.general import (
    ApplyConfig,
    GiveReadableTensorNames,
    GiveUniqueNodeNames,
    RemoveStaticGraphInputs,
    RemoveUnusedTensors,
)

print("Step 1: Cleaning up model")
new_model = cleanup_model(new_model)
print("Step 2: Applying transformations")
new_model = new_model.transform(ConvertQONNXtoFINN())
new_model = new_model.transform(InferShapes())
new_model = new_model.transform(FoldConstants())
new_model = new_model.transform(GiveUniqueNodeNames())
new_model = new_model.transform(GiveReadableTensorNames())
new_model = new_model.transform(InferDataTypes())
new_model = new_model.transform(RemoveStaticGraphInputs())
new_model.save("onnx_models/lenet_clean.onnx")

Step 1: Cleaning up model
Step 2: Applying transformations


In [25]:
from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN
new_model=new_model.transform(ConvertQONNXtoFINN())
new_model.save("onnx_models/lenet_finn_qonnx.onnx")

In [26]:
from finn.transformation.streamline import Streamline
from qonnx.transformation.lower_convs_to_matmul import LowerConvsToMatMul
from qonnx.transformation.bipolar_to_xnor import ConvertBipolarMatMulToXnorPopcount
import finn.transformation.streamline.absorb as absorb
from finn.transformation.streamline.reorder import MakeMaxPoolNHWC, MoveScalarLinearPastInvariants
from qonnx.transformation.infer_data_layouts import InferDataLayouts
from qonnx.transformation.general import RemoveUnusedTensors

new_model = ModelWrapper("onnx_models/lenet_finn_qonnx.onnx")
new_model = new_model.transform(absorb.AbsorbSignBiasIntoMultiThreshold())
new_model = new_model.transform(Streamline())
need_lowering = len(new_model.get_nodes_by_op_type("Conv")) > 0
if need_lowering:
    print("Needs lowering")
    new_model = new_model.transform(LowerConvsToMatMul())
    new_model = new_model.transform(MakeMaxPoolNHWC())
    new_model = new_model.transform(absorb.AbsorbTransposeIntoMultiThreshold())
    new_model = new_model.transform(MakeMaxPoolNHWC())
    new_model = new_model.transform(absorb.AbsorbConsecutiveTransposes())
new_model = new_model.transform(ConvertBipolarMatMulToXnorPopcount())
new_model = new_model.transform(Streamline())
# absorb final add-mul nodes into TopK
new_model = new_model.transform(absorb.AbsorbScalarMulAddIntoTopK())
new_model = new_model.transform(InferDataLayouts())
new_model = new_model.transform(RemoveUnusedTensors())

new_model.save("onnx_models/lenet5_quant_streamlined.onnx")


Needs lowering


In [2]:
from finn.util.visualization import showSrc, showInNetron
showInNetron("onnx_models/lenet5_quant_streamlined.onnx")

OSError: [Errno 98] Address already in use

In [34]:
import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
from finn.transformation.fpgadataflow.create_dataflow_partition import (
    CreateDataflowPartition,
)
from finn.transformation.move_reshape import RemoveCNVtoFCFlatten
from qonnx.custom_op.registry import getCustomOp
from qonnx.transformation.infer_data_layouts import InferDataLayouts

# choose the memory mode for the MVTU units, decoupled or const
mem_mode = "decoupled"

new_model = ModelWrapper("onnx_models/lenet5_quant_streamlined.onnx")
new_model = new_model.transform(absorb.AbsorbSignBiasIntoMultiThreshold())
new_model = new_model.transform(Streamline())
need_lowering = len(new_model.get_nodes_by_op_type("Conv")) > 0
if need_lowering:
    print("Needs lowering")
    new_model = new_model.transform(LowerConvsToMatMul())
    new_model = new_model.transform(MakeMaxPoolNHWC())
    new_model = new_model.transform(absorb.AbsorbTransposeIntoMultiThreshold())
    new_model = new_model.transform(MakeMaxPoolNHWC())
    new_model = new_model.transform(absorb.AbsorbConsecutiveTransposes())
new_model = new_model.transform(ConvertBipolarMatMulToXnorPopcount())
new_model = new_model.transform(Streamline())
# absorb final add-mul nodes into TopK
new_model = new_model.transform(absorb.AbsorbScalarMulAddIntoTopK())
new_model = new_model.transform(InferDataLayouts())
new_model = new_model.transform(RemoveUnusedTensors())

new_model.save("onnx_models/lenet5_quant_dataflow_parent.onnx")
showInNetron("onnx_models/lenet5_quant_dataflow_parent.onnx")



Stopping http://0.0.0.0:8081
Serving 'onnx_models/lenet5_quant_dataflow_parent.onnx' at http://0.0.0.0:8081


In [1]:
showInNetron("onnx_models/lenet5_quant_dataflow_parent.onnx")

NameError: name 'showInNetron' is not defined

In [35]:
parent_model = new_model.transform(CreateDataflowPartition())

sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0]
sdp_node = getCustomOp(sdp_node)
dataflow_model_filename = sdp_node.get_nodeattr("model")
# save the dataflow partition with a different name for easier access
dataflow_model = ModelWrapper(dataflow_model_filename)
dataflow_model.save("onnx_models/lenet5_quant_dataflow.onnx")

IndexError: list index out of range

In [None]:
showInNetron("onnx_models/lenet5_quant_dataflow.onnx")

In [None]:
model = ModelWrapper("onnx_models/lenet5_quant_dataflow.onnx")
fc0 = model.graph.node[1]
fcw = getCustomOp(fc0)
print("CustomOp wrapper is of class " + fcw.__class__.__name__)
fcw.get_nodeattr_types()

In [None]:
import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
from finn.transformation.fpgadataflow.create_dataflow_partition import (
    CreateDataflowPartition,
)
from finn.transformation.move_reshape import RemoveCNVtoFCFlatten
from qonnx.custom_op.registry import getCustomOp
from qonnx.transformation.infer_data_layouts import InferDataLayouts

model = ModelWrapper("onnx_models/lenet5_quant_dataflow.onnx")
fc_layers = model.get_nodes_by_op_type("MatrixVectorActivation")
# each tuple is (PE, SIMD, in_fifo_depth) for a layer
folding = [
    (1, 1, [64], [64]),
    (1, 6, [64], [64]),
    (1, 4, [64], [64]),
    (1, 4, [64], [64]),
    (1, 4, [64], [64]),
]
for fcl, (pe, simd, ififodepth, ofifodepth) in zip(fc_layers, folding):
    fcl_inst = getCustomOp(fcl)
    fcl_inst.set_nodeattr("PE", pe)
    fcl_inst.set_nodeattr("SIMD", simd)
    fcl_inst.set_nodeattr("inFIFODepths", ififodepth)
    fcl_inst.set_nodeattr("outFIFODepths", ofifodepth)
# use same SIMD values for the sliding window operators
swg_layers = model.get_nodes_by_op_type("ConvolutionInputGenerator")
for i in range(len(swg_layers)):
    swg_inst = getCustomOp(swg_layers[i])
    simd = folding[i][1]
    swg_inst.set_nodeattr("SIMD", simd)

model = model.transform(GiveUniqueNodeNames())
model = model.save("onnx_models/lenet5_folded.onnx")