# Verify Exported ONNX Model in FINN



In [1]:
import onnx 
import torch 

# Import model into FINN with ModelWrapper

The quantized model is initialised in a ModelWrapper to test how the model behaves with its new FINN structure.

In [2]:
import os
from qonnx.core.modelwrapper import ModelWrapper

model_dir = ""
ready_model_filename = model_dir + "cnn_ready_qout.onnx"
model_for_sim = ModelWrapper(ready_model_filename)

FINN provides a number of functions to access information about the model. This can be used to verify information like the model inputs/outputs and the model shape are still correct

In [3]:
from qonnx.core.datatype import DataType

finnonnx_in_tensor_name = model_for_sim.graph.input[0].name
finnonnx_out_tensor_name = model_for_sim.graph.output[0].name
print("Input tensor name: %s" % finnonnx_in_tensor_name)
print("Output tensor name: %s" % finnonnx_out_tensor_name)
finnonnx_model_in_shape = model_for_sim.get_tensor_shape(finnonnx_in_tensor_name)
finnonnx_model_out_shape = model_for_sim.get_tensor_shape(finnonnx_out_tensor_name)
print("Input tensor shape: %s" % str(finnonnx_model_in_shape))
print("Output tensor shape: %s" % str(finnonnx_model_out_shape))
finnonnx_model_in_dt = model_for_sim.get_tensor_datatype(finnonnx_in_tensor_name)
finnonnx_model_out_dt = model_for_sim.get_tensor_datatype(finnonnx_out_tensor_name)
print("Input tensor datatype: %s" % str(finnonnx_model_in_dt.name))
print("Output tensor datatype: %s" % str(finnonnx_model_out_dt.name))
print("List of node operator types in the graph: ")
print([x.op_type for x in model_for_sim.graph.node])

Input tensor name: global_in
Output tensor name: global_out
Input tensor shape: [1, 2, 16, 16]
Output tensor shape: [1, 4]
Input tensor datatype: INT8
Output tensor datatype: BIPOLAR
List of node operator types in the graph: 
['MultiThreshold', 'Add', 'Mul', 'Conv', 'Mul', 'BatchNormalization', 'MultiThreshold', 'Mul', 'MaxPool', 'Flatten', 'MatMul', 'Mul', 'MultiThreshold']


Note that the output tensor is (as of yet) marked as a float32 value, even though we know the output is binary.

# Network preparation: Tidy-up transformations

Before running the verification, we need to prepare our FINN-ONNX model. In particular, all the intermediate tensors need to have statically defined shapes. To do this, we apply some graph transformations to the model like a kind of "tidy-up" to make it easier to process. 

In [4]:
from qonnx.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames, RemoveStaticGraphInputs
from qonnx.transformation.infer_shapes import InferShapes
from qonnx.transformation.infer_datatypes import InferDataTypes
from qonnx.transformation.fold_constants import FoldConstants

model_for_sim = model_for_sim.transform(InferShapes())
model_for_sim = model_for_sim.transform(FoldConstants())
model_for_sim = model_for_sim.transform(GiveUniqueNodeNames())
model_for_sim = model_for_sim.transform(GiveReadableTensorNames())
model_for_sim = model_for_sim.transform(InferDataTypes())
model_for_sim = model_for_sim.transform(RemoveStaticGraphInputs())

verif_model_filename = model_dir + "cnn-verification.onnx"
model_for_sim.save(verif_model_filename)

# Load the Dataset

The dataset is loaded as before

In [5]:
def filter_strings(lst):
    filtered_list = [s for s in lst if not any(digit in s for digit in "3456789")]
    return filtered_list

In [6]:
import numpy as np
import os as os
from sklearn.model_selection import train_test_split
import torch
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

folder = "../fullPlutoImport"
files = os.listdir(folder)

filtered_files = filter_strings(files)

factor = 2
noFiles = len(filtered_files)

arr = np.ndarray((int(7800*noFiles/factor),128*factor*2), float)
labels = np.ndarray((int(7800*noFiles/factor),4))

seed = 0

i = 0;
for idx, npz in enumerate(filtered_files):
    
    a = np.load(os.path.join(folder, npz))
    
    start_idx = (idx*int(7800/factor)) if idx <20 else (idx)*int(7800/factor)-1
    end_idx = (1+idx)*int(7800/factor) if idx <20 else (1+idx)*int(7800/factor)-1
           
    reshaped_arr = a["samples"].reshape(int(7800/factor), 128*factor)
    
    float_array = np.ndarray((int(7800/factor), 128*factor*2), float)
    for j in range(reshaped_arr.shape[0]):
        float_array[j] = np.ravel((reshaped_arr[j].real, reshaped_arr[j].imag),'F')
    arr[start_idx:end_idx] = float_array
    labels[start_idx:end_idx] = np.tile(a["active_channels"],  (int(7800/factor), 1))

    i+=1
    if i >= noFiles:
        break
    
normalized_array = 255 * (arr + 2) / (4) - 128

normalized_array = normalized_array.astype(np.int8)

ver_arr = TensorDataset(torch.tensor(normalized_array, dtype=torch.float32), torch.tensor(labels, dtype=torch.int8))


n_verification_inputs = 100
input_tensor = ver_arr.tensors[0][:n_verification_inputs]


# Rebuild CNN

The model is remade in Brevitas using the same weights as before using its state dictionary.

In [7]:
input_bits = 4
a_bits = 4
w_bits = 4
filters_conv = 16

In [8]:
from torch import nn
import brevitas.nn as qnn
from brevitas.quant import IntBias
from brevitas.inject.enum import ScalingImplType
from brevitas.inject.defaults import Int8ActPerTensorFloatMinMaxInit

# Setting seeds for reproducibility
torch.manual_seed(0)
np.random.seed(0)

class InputQuantizer(Int8ActPerTensorFloatMinMaxInit):
    bit_width = input_bits
    min_val = -2.0
    max_val = 2.0
    scaling_impl_type = ScalingImplType.CONST # Fix the quantization range to [min_val, max_val]

model = nn.Sequential(
    # Input quantization layer
    qnn.QuantHardTanh(act_quant=InputQuantizer),

    qnn.QuantConv2d(2, filters_conv, 3, padding=1, weight_bit_width=w_bits, bias=False),
    nn.BatchNorm2d(filters_conv),
    qnn.QuantReLU(bit_width=a_bits),
    nn.MaxPool2d(2),
    
    nn.Flatten(),

    qnn.QuantLinear(filters_conv*8*8, 4, weight_bit_width=w_bits, bias=False),
)

In [9]:
# Uncomment the following line if you previously chose to train the network yourself
trained_state_dict = torch.load("state_dict_self-trained.pth")

model.load_state_dict(trained_state_dict, strict=False)

  trained_state_dict = torch.load("state_dict_self-trained.pth")


<All keys matched successfully>

Optional: quantize the output.

In [10]:
from brevitas.nn import QuantIdentity

class BipolarForExport(nn.Module):
    def __init__(self, my_pretrained_model):
        super(BipolarForExport, self).__init__()
        self.pretrained = my_pretrained_model
        self.qnt_output = QuantIdentity(
            quant_type='binary', 
            scaling_impl_type='const',
            bit_width=1, min_val=-1.0, max_val=1.0)
    
    def forward(self, x):
        out_original = self.pretrained(x)
        out_final = self.qnt_output(out_original)   # output as {-1,1}     
        return out_final



model = BipolarForExport(model)

# 4. Compare FINN & Brevitas execution <a id="compare_brevitas"></a>

FINN provides the finn.core.onnx_exec function to simulate what happens in FINN with the given model. By executing on it using this function it can be verified that the model will act in the same manor as the Brevitas model.

In [11]:
import finn.core.onnx_exec as oxe

def inference_with_finn_onnx(current_inp):
    finnonnx_in_tensor_name = model_for_sim.graph.input[0].name
    finnonnx_model_in_shape = model_for_sim.get_tensor_shape(finnonnx_in_tensor_name)
    finnonnx_out_tensor_name = model_for_sim.graph.output[0].name
    # convert input to numpy for FINN
    current_inp = current_inp.detach().numpy()
    # reshape to expected input (add 1 for batch dimension)
    current_inp = current_inp.reshape(finnonnx_model_in_shape)
    # create the input dictionary
    input_dict = {finnonnx_in_tensor_name : current_inp} 
    # run with FINN's execute_onnx
    output_dict = oxe.execute_onnx(model_for_sim, input_dict)
    #get the output tensor
    finn_output = output_dict[finnonnx_out_tensor_name] 
    return finn_output

To get outputs from the brevitas model, simply run as normal.

In [12]:
def inference_with_brevitas(current_inp):
    model.eval() 
    brevitas_output = model(current_inp)

    return brevitas_output

Now the inference helper functions are called for each input and the outputs compared.

In [13]:


import numpy as np
from tqdm import trange

verify_range = trange(n_verification_inputs, desc="FINN execution", position=0, leave=True)
model.eval()

ok = 0
nok = 0

for i in verify_range:
    # run in Brevitas with PyTorch tensor
    current_inp = input_tensor[i].reshape((1, 2,16,16))
    brevitas_output = inference_with_brevitas(current_inp).detach().numpy()
    finn_output = inference_with_finn_onnx(current_inp)
    # compare the outputs
    ok += 1 if (finn_output==brevitas_output).all() else 0
    nok += 1 if (finn_output != brevitas_output).any() else 0
    verify_range.set_description("ok %d nok %d" % (ok, nok))
    verify_range.refresh()



  return super().rename(names)
  return F.conv2d(
ok 100 nok 0: 100%|██████████| 100/100 [00:06<00:00, 16.65it/s]


In [14]:
try:
    assert ok == n_verification_inputs
    print("Verification succeeded. Brevitas and FINN-ONNX execution outputs are identical")
except AssertionError:
    assert False, "Verification failed. Brevitas and FINN-ONNX execution outputs are NOT identical"

Verification succeeded. Brevitas and FINN-ONNX execution outputs are identical
