In [1]:
from brevitas.quant.base import UintQuant, MaxStatsScaling
from brevitas.quant.solver.weight import WeightQuantSolver
from brevitas.quant.solver.bias import BiasQuantSolver
from brevitas.quant.solver.act import ActQuantSolver
from brevitas.core.function_wrapper.ops_ste import CeilSte
from brevitas.inject.enum import RestrictValueType

class Uint16Bias(UintQuant, MaxStatsScaling, BiasQuantSolver):
    scaling_per_output_channel = False
    restrict_scaling_type = RestrictValueType.POWER_OF_TWO
    bit_width = 16
    restrict_value_float_to_int_impl = CeilSte
    requires_input_bit_width = False
    requires_input_scale = True
    

class Uint2Weight(UintQuant, MaxStatsScaling, WeightQuantSolver):
    scaling_per_output_channel = False
    restrict_scaling_type = RestrictValueType.POWER_OF_TWO
    bit_width = 2
    restrict_value_float_to_int_impl = CeilSte
    requires_input_bit_width = False
    requires_input_scale = True

class Uint2Act(UintQuant, MaxStatsScaling, ActQuantSolver):
    scaling_per_output_channel = False
    restrict_scaling_type = RestrictValueType.POWER_OF_TWO
    bit_width = 2
    restrict_value_float_to_int_impl = CeilSte
    requires_input_bit_with = False
    requires_input_scale = True

In [2]:
# Some standard imports
import io
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim

from brevitas.nn import QuantLinear, QuantReLU, QuantSigmoid

In [3]:
"""
class Net(nn.Module):

    def __init__(self, input_dim, output_dim):
        
#        input_dim (int): size of the input features
#        output_dim (int): size of the output
        
        super(Net, self).__init__()
        self.fc1 = QuantLinear(input_dim, 2, bias=True,weight_bit_width=2)
        self.fc2 = QuantLinear(2, output_dim, bias=True,weight_bit_width=2)

    def forward(self, x): # there are different ways of implementing this
        x = self.fc1(x)
        x = QuantSigmoid(x, act_quant=Uint2Act)
        x = self.fc2(x)
        return [x]
"""
    

'\nclass Net(nn.Module):\n\n    def __init__(self, input_dim, output_dim):\n        \n#        input_dim (int): size of the input features\n#        output_dim (int): size of the output\n        \n        super(Net, self).__init__()\n        self.fc1 = QuantLinear(input_dim, 2, bias=True,weight_bit_width=2)\n        self.fc2 = QuantLinear(2, output_dim, bias=True,weight_bit_width=2)\n\n    def forward(self, x): # there are different ways of implementing this\n        x = self.fc1(x)\n        x = QuantSigmoid(x, act_quant=Uint2Act)\n        x = self.fc2(x)\n        return [x]\n'

In [4]:
    input_dim=2
    output_dim=1

    model = torch.nn.Sequential(
            QuantLinear(input_dim, 2, bias=True,weight_bit_width=2),
            QuantSigmoid(act_quant=Uint2Act),
            QuantLinear(2, output_dim, bias=True,weight_bit_width=2))

In [5]:
def main():
    import numpy as np
    # create instance of Net
#    model = Net(2,1)

    #model.state_dict()

    input_dim=2
    output_dim=1

    model = torch.nn.Sequential(
            QuantLinear(input_dim, 2, bias=True,weight_bit_width=2),
            QuantSigmoid(act_quant=Uint2Act),
            QuantLinear(2, output_dim, bias=True,weight_bit_width=2))
    
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)

    x = torch.tensor([[0, 0], [0, 1], [1, 0], [1, 1]], device=device).float()
    y = torch.tensor([[0], [1], [1], [0]], device=device).view(4,1).float()

    # convert numpy array to tensor
    x_tensor = torch.clone(x)
    y_tensor = torch.clone(y)

    # set training mode
    model.train() 
    # In PyTorch, models have a train() method which, somewhat disappointingly, 
    # does NOT perform a training step. Its only purpose is to set the model to training mode. 
    # Why is this important? Some models may use mechanisms like Dropout, for instance, 
    # which have distinct behaviors during training and evaluation phases.


    # set training parameters
    optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
    criterion = torch.nn.MSELoss() # defines a MSE Loss function

    # defines number of epochs
    num_epochs = 50001
    # start to train
    epoch_loss = []
    for epoch in range(num_epochs):
        # forward
        outputs = model(x_tensor)[0]

        # calculate loss
        loss = criterion(outputs, y_tensor)

        # update weights
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # save loss of this epoch
        epoch_loss.append(loss.data.numpy().tolist()) 
        
        if epoch % 1000 == 0:
            print("Epoch: {0}, Loss: {1}, ".format(epoch, loss.to("cpu").detach().numpy()))
    
    print(model.state_dict())

     

In [6]:
if __name__ == "__main__": 

    # Let's build our model 
    main()
    
    # loss is stuck at 0.25 after the first 1000 epoch

  return torch._C._cuda_getDeviceCount() > 0
  return F.mse_loss(input, target, reduction=self.reduction)


Epoch: 0, Loss: 0.5281404256820679, 
Epoch: 1000, Loss: 0.25, 
Epoch: 2000, Loss: 0.25, 
Epoch: 3000, Loss: 0.25, 
Epoch: 4000, Loss: 0.25, 
Epoch: 5000, Loss: 0.25, 
Epoch: 6000, Loss: 0.25, 
Epoch: 7000, Loss: 0.25, 
Epoch: 8000, Loss: 0.25, 
Epoch: 9000, Loss: 0.25, 
Epoch: 10000, Loss: 0.25, 
Epoch: 11000, Loss: 0.25, 
Epoch: 12000, Loss: 0.25, 
Epoch: 13000, Loss: 0.25, 
Epoch: 14000, Loss: 0.25, 
Epoch: 15000, Loss: 0.25, 
Epoch: 16000, Loss: 0.25, 
Epoch: 17000, Loss: 0.25, 
Epoch: 18000, Loss: 0.25, 
Epoch: 19000, Loss: 0.25, 
Epoch: 20000, Loss: 0.25, 
Epoch: 21000, Loss: 0.25, 
Epoch: 22000, Loss: 0.25, 
Epoch: 23000, Loss: 0.25, 
Epoch: 24000, Loss: 0.25, 
Epoch: 25000, Loss: 0.25, 
Epoch: 26000, Loss: 0.25, 
Epoch: 27000, Loss: 0.25, 
Epoch: 28000, Loss: 0.25, 
Epoch: 29000, Loss: 0.25, 
Epoch: 30000, Loss: 0.25, 
Epoch: 31000, Loss: 0.25, 
Epoch: 32000, Loss: 0.25, 
Epoch: 33000, Loss: 0.25, 
Epoch: 34000, Loss: 0.25, 
Epoch: 35000, Loss: 0.25, 
Epoch: 36000, Loss: 0.25, 


In [7]:
import brevitas.onnx as bo
from brevitas.quant_tensor import QuantTensor
"""
# create a QuantTensor instance to mark input as bipolar during export
input_a = np.random.randint(0, 1, size=input_shape).astype(np.float32)
input_a = 2 * input_a - 1
scale = 1.0
input_t = torch.from_numpy(input_a * scale)
input_qt = QuantTensor(
    input_t, scale=torch.tensor(scale), bit_width=torch.tensor(1.0), signed=True
)

bo.export_finn_onnx(
    model_for_export, export_path=ready_model_filename, input_t=input_qt
)
"""

'\n# create a QuantTensor instance to mark input as bipolar during export\ninput_a = np.random.randint(0, 1, size=input_shape).astype(np.float32)\ninput_a = 2 * input_a - 1\nscale = 1.0\ninput_t = torch.from_numpy(input_a * scale)\ninput_qt = QuantTensor(\n    input_t, scale=torch.tensor(scale), bit_width=torch.tensor(1.0), signed=True\n)\n\nbo.export_finn_onnx(\n    model_for_export, export_path=ready_model_filename, input_t=input_qt\n)\n'

In [8]:
# PYTORCH FINN-ONNX EXPORT
import torch.onnx 
# set the model to inference mode 
#model = Net(2,1)
#model.state_dict()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
x = torch.tensor([[0, 0], [0, 1], [1, 0], [1, 1]], device=device).float()
y = torch.tensor([[0], [1], [1], [0]], device=device).view(4,1).float()
model.eval() 

# Let's create a dummy input tensor  
dummy_input = (x) # no test input
#dummy_input = QuantTensor(dummy_input,scale=1, bit_width=torch.tensor(1.0), signed=True)

# Export the model   

torch.onnx.export(model,         # model being run 
                  dummy_input,       # model input (or a tuple for multiple inputs) 
                 "xor_network.onnx",       # where to save the model  
                 export_params=True,  # store the trained parameter weights inside the model file 
                 opset_version=11,    # the ONNX version to export the model to 
                 do_constant_folding=True,  # whether to execute constant folding for optimization 
                 input_names = ['modelInput'],   # the model's input names 
                 output_names = ['modelOutput'], # the model's output names 
                 dynamic_axes={'modelInput' : {0 : 'batch_size'},    # variable length axes 
                                'modelOutput' : {0 : 'batch_size'}}) 
"""
bo.export_finn_onnx(model,export_path="xor_network_q.onnx", input_t=dummy_input,export_params=True,
                   opset_version=10,do_constant_folding=True,input_names=['modelInput'],output_names=['modelOutput'],
                    dynamic_axes={'modelInput' : {0 : 'batch_size'},    # variable length axes 
                                'modelOutput' : {0 : 'batch_size'}}) 
"""                    
print(" ") 
print('Model has been converted to ONNX')

 
Model has been converted to ONNX


  training = torch.tensor(training, dtype=torch.bool)
  signed = torch.tensor(signed, dtype=torch.bool)


In [9]:
# check the ONNX model with ONNX’s API
import onnx

onnx_model = onnx.load("xor_network_q.onnx")
onnx.checker.check_model(onnx_model)

In [10]:
from finn.util.visualization import showSrc, showInNetron
from finn.util.basic import make_build_dir

    
showInNetron("xor_network_q.onnx")

Serving 'xor_network_q.onnx' at http://0.0.0.0:8081


In [11]:
from finn.core.modelwrapper import ModelWrapper
model2 = ModelWrapper("xor_network_q.onnx")

In [12]:
# Network Preparation

#Tidy up transformations
from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames, RemoveStaticGraphInputs
from finn.transformation.infer_shapes import InferShapes
from finn.transformation.infer_datatypes import InferDataTypes
from finn.transformation.fold_constants import FoldConstants

model2 = model2.transform(InferShapes())
model2 = model2.transform(FoldConstants())
model2 = model2.transform(GiveUniqueNodeNames())
model2 = model2.transform(GiveReadableTensorNames())
model2 = model2.transform(InferDataTypes())
model2 = model2.transform(RemoveStaticGraphInputs())

model2.save("xor_network_q.onnx")

In [13]:
showInNetron("xor_network_q.onnx")

Stopping http://0.0.0.0:8081
Serving 'xor_network_q.onnx' at http://0.0.0.0:8081


In [14]:
# Adding Pre and Postprocessing
from finn.util.pytorch import ToTensor
from finn.transformation.merge_onnx_models import MergeONNXModels
from finn.core.datatype import DataType
import brevitas.onnx as bo

model2 = ModelWrapper("xor_network_q.onnx")
global_inp_name = model2.graph.input[0].name
ishape = model2.get_tensor_shape(global_inp_name)
# preprocessing: torchvision's ToTensor divides uint8 inputs by 255
totensor_pyt = ToTensor()
chkpt_preproc_name = "xor_network_q.onnx"
bo.export_finn_onnx(totensor_pyt, ishape, chkpt_preproc_name)

# join preprocessing and core model
pre_model = ModelWrapper(chkpt_preproc_name)
model2 = model2.transform(MergeONNXModels(pre_model))
# add input quantization annotation: UINT8 for all BNN-PYNQ models
global_inp_name = model2.graph.input[0].name
model2.set_tensor_datatype(global_inp_name, DataType["UINT8"])

model2.save("xor_network_q.onnx")
showInNetron("xor_network_q.onnx")

Stopping http://0.0.0.0:8081




Serving 'xor_network_q.onnx' at http://0.0.0.0:8081


In [15]:
from finn.transformation.insert_topk import InsertTopK

# postprocessing: insert Top-1 node at the end
model2 = model2.transform(InsertTopK(k=1))
chkpt_name = "xor_network_q.onnx"
# tidy-up again
model2 = model2.transform(InferShapes())
model2 = model2.transform(FoldConstants())
model2 = model2.transform(GiveUniqueNodeNames())
model2 = model2.transform(GiveReadableTensorNames())
model2 = model2.transform(InferDataTypes())
model2 = model2.transform(RemoveStaticGraphInputs())
model2.save(chkpt_name)

showInNetron("xor_network_q.onnx")

Stopping http://0.0.0.0:8081
Serving 'xor_network_q.onnx' at http://0.0.0.0:8081


In [16]:
# Streamlining
from finn.transformation.streamline import Streamline
showSrc(Streamline)

class Streamline(Transformation):
    """Apply the streamlining transform, see arXiv:1709.04060."""

    def apply(self, model):
        streamline_transformations = [
            ConvertSubToAdd(),
            ConvertDivToMul(),
            BatchNormToAffine(),
            ConvertSignToThres(),
            MoveMulPastMaxPool(),
            MoveScalarLinearPastInvariants(),
            AbsorbSignBiasIntoMultiThreshold(),
            MoveAddPastMul(),
            MoveScalarAddPastMatMul(),
            MoveAddPastConv(),
            MoveScalarMulPastMatMul(),
            MoveScalarMulPastConv(),
            MoveAddPastMul(),
            CollapseRepeatedAdd(),
            CollapseRepeatedMul(),
            MoveMulPastMaxPool(),
            AbsorbAddIntoMultiThreshold(),
            FactorOutMulSignMagnitude(),
            AbsorbMulIntoMultiThreshold(),
            Absorb1BitMulIntoMatMul(),
            Absorb1BitMulIntoConv(),
            RoundAndClipThresholds(),
        ]
        for tr

In [17]:
from finn.transformation.streamline.reorder import MoveScalarLinearPastInvariants
import finn.transformation.streamline.absorb as absorb

model = ModelWrapper("xor_network_q.onnx")
# move initial Mul (from preproc) past the Reshape
model = model.transform(MoveScalarLinearPastInvariants())
# streamline
model = model.transform(Streamline())
model.save("xor_network_q.onnx")
showInNetron("xor_network_q.onnx")

Stopping http://0.0.0.0:8081
Serving 'xor_network_q.onnx' at http://0.0.0.0:8081
