<a href="https://colab.research.google.com/github/aryamanpathak2022/Cnn_models/blob/main/C10_TO_VERILOG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [18]:
!pip install nngen numpy
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim




#PYTORCH MODEL

In [19]:
class ComplexCNN(nn.Module):
     def __init__(self):
        super(ComplexCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, padding=1) # (N, 3, 32, 32) -> (N, 64, 32, 32)
        self.bn1 = nn.BatchNorm2d(64)                          # (N, 64, 32, 32) -> (N, 64, 32, 32)
        self.conv2 = nn.Conv2d(64, 128, kernel_size=3, padding=1) # (N, 64, 32, 32) -> (N, 128, 32, 32)
        self.bn2 = nn.BatchNorm2d(128)                         # (N, 128, 32, 32) -> (N, 128, 32, 32)
        self.conv3 = nn.Conv2d(128, 256, kernel_size=3, padding=1) # (N, 128, 32, 32) -> (N, 256, 32, 32)
        self.bn3 = nn.BatchNorm2d(256)                         # (N, 256, 32, 32) -> (N, 256, 32, 32)
        self.pool = nn.MaxPool2d(2, 2)                         # (N, 256, 32, 32) -> (N, 256, 16, 16)
        self.fc1 = nn.Linear(256 * 4 * 4, 1024)                # (N, 256 * 4 * 4) -> (N, 1024)
        self.fc2 = nn.Linear(1024, 512)                        # (N, 1024) -> (N, 512)
        self.fc3 = nn.Linear(512, 10)                          # (N, 512) -> (N, 10)
        self.dropout = nn.Dropout(0.5)

     def forward(self, x):
        x = self.pool(F.relu(self.bn1(self.conv1(x))))         # (N, 3, 32, 32) -> (N, 64, 32, 32) -> (N, 64, 16, 16)
        x = self.pool(F.relu(self.bn2(self.conv2(x))))         # (N, 64, 16, 16) -> (N, 128, 16, 16) -> (N, 128, 8, 8)
        x = self.pool(F.relu(self.bn3(self.conv3(x))))         # (N, 128, 8, 8) -> (N, 256, 8, 8) -> (N, 256, 4, 4)
        x = x.view(-1, 256 * 4 * 4)                            # (N, 256, 4, 4) -> (N, 256 * 4 * 4)
        x = F.relu(self.fc1(x))                                # (N, 256 * 4 * 4) -> (N, 1024)
        x = self.dropout(x)                                    # (N, 1024) -> (N, 1024)
        x = F.relu(self.fc2(x))                                # (N, 1024) -> (N, 512)
        x = self.fc3(x)                                        # (N, 512) -> (N, 10)
        return x

#NNGEN MODEL DEFINATION


In [20]:
from __future__ import absolute_import
from __future__ import print_function

import numpy as np
import nngen as ng
import torch
import torch.nn.functional as F
from torch import nn

#  NNgen data types
act_dtype = ng.int16
weight_dtype = ng.int16
bias_dtype = ng.int16
scale_dtype = ng.int16

# Input placeholder
input_layer = ng.placeholder(dtype=act_dtype, shape=(1, 32, 32, 3), name='input_layer')

# Layer 0: conv2d, batchnorm, relu, max_pool
w0 = ng.variable(dtype=weight_dtype, shape=(64, 3, 3, 3), name='w0')
b0 = ng.variable(dtype=bias_dtype, shape=(w0.shape[0],), name='b0')
s0 = ng.variable(dtype=scale_dtype, shape=(w0.shape[0],), name='s0')

a0 = ng.conv2d(input_layer, w0, strides=(1, 1, 1, 1), bias=b0, scale=s0, act_func=ng.relu, sum_dtype=ng.int64)

a0 = ng.max_pool_serial(a0, ksize=(1, 2, 2, 1), strides=(1, 2, 2, 1))

# Layer 1: conv2d, batchnorm, relu, max_pool
w1 = ng.variable(weight_dtype, shape=(128, 3, 3, a0.shape[-1]), name='w1')
b1 = ng.variable(bias_dtype, shape=(w1.shape[0],), name='b1')
s1 = ng.variable(scale_dtype, shape=(w1.shape[0],), name='s1')

a1 = ng.conv2d(a0, w1, strides=(1, 1, 1, 1), bias=b1, scale=s1, act_func=ng.relu, sum_dtype=ng.int64)

a1 = ng.max_pool_serial(a1, ksize=(1, 2, 2, 1), strides=(1, 2, 2, 1))

# Layer 2: conv2d, batchnorm, relu, max_pool
w2 = ng.variable(weight_dtype, shape=(256, 3, 3, a1.shape[-1]), name='w2')
b2 = ng.variable(bias_dtype, shape=(w2.shape[0],), name='b2')
s2 = ng.variable(scale_dtype, shape=(w2.shape[0],), name='s2')

a2 = ng.conv2d(a1, w2, strides=(1, 1, 1, 1), bias=b2, scale=s2, act_func=ng.relu, sum_dtype=ng.int64)
a2 = ng.max_pool_serial(a2, ksize=(1, 2, 2, 1), strides=(1, 2, 2, 1))

# Flatten the output for fully connected layers
a2_flat = ng.reshape(a2, [1, -1])

# Layer 3: full-connection, relu
w3 = ng.variable(weight_dtype, shape=(1024, a2_flat.shape[-1]), name='w3')
b3 = ng.variable(bias_dtype, shape=(w3.shape[0],), name='b3')
s3 = ng.variable(scale_dtype, shape=(w3.shape[0],), name='s3')

a3 = ng.matmul(a2_flat, w3, bias=b3, scale=s3, transposed_b=True, act_func=ng.relu, sum_dtype=ng.int64)

# Layer 4: full-connection, relu
w4 = ng.variable(weight_dtype, shape=(512, a3.shape[-1]), name='w4')
b4 = ng.variable(bias_dtype, shape=(w4.shape[0],), name='b4')
s4 = ng.variable(scale_dtype, shape=(w4.shape[0],), name='s4')

a4 = ng.matmul(a3, w4, bias=b4, scale=s4, transposed_b=True, act_func=ng.relu, sum_dtype=ng.int64)

# Layer 5: full-connection (output layer)
w5 = ng.variable(weight_dtype, shape=(10, a4.shape[-1]), name='w5')
b5 = ng.variable(bias_dtype, shape=(w5.shape[0],), name='b5')
s5 = ng.variable(scale_dtype, shape=(w5.shape[0],), name='s5')
print(w5.value)

output_layer = ng.matmul(a4, w5, bias=b5, scale=s5, transposed_b=True, name='output_layer', sum_dtype=ng.int64)


None


#WEIGHTS

In [21]:

# Assuming you have a trained PyTorch model
pytorch_model = ComplexCNN()
pytorch_model.load_state_dict(torch.load('cifar_net.pth'))  # Loading  trained model parameters

# Assigning  weights to NNgen variables
w0.set_value(pytorch_model.conv1.weight.data.numpy().astype(np.int64))
b0.set_value(pytorch_model.bn1.bias.data.numpy().astype(np.int64))
s0.set_value(np.ones(s0.shape, dtype=np.int64))

w1.set_value(pytorch_model.conv2.weight.data.numpy().astype(np.int64))
b1.set_value(pytorch_model.bn2.bias.data.numpy().astype(np.int64))
s1.set_value(np.ones(s1.shape, dtype=np.int64))

w2.set_value(pytorch_model.conv3.weight.data.numpy().astype(np.int64))
b2.set_value(pytorch_model.bn3.bias.data.numpy().astype(np.int64))
s2.set_value(np.ones(s2.shape, dtype=np.int64))

w3.set_value(pytorch_model.fc1.weight.data.numpy().astype(np.int64).transpose())
b3.set_value(pytorch_model.fc1.bias.data.numpy().astype(np.int64))
s3.set_value(np.ones(s3.shape, dtype=np.int64))

w4.set_value(pytorch_model.fc2.weight.data.numpy().astype(np.int64).transpose())
b4.set_value(pytorch_model.fc2.bias.data.numpy().astype(np.int64))
s4.set_value(np.ones(s4.shape, dtype=np.int64))

w5.set_value(pytorch_model.fc3.weight.data.numpy().astype(np.int64).transpose())
b5.set_value(pytorch_model.fc3.bias.data.numpy().astype(np.int64))
s5.set_value(np.ones(s5.shape, dtype=np.int64))

print(output_layer.value)


None


In [22]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [34]:
pytorch_model = ComplexCNN()
pytorch_model.load_state_dict(torch.load('cifar_net.pth'))

# Print the weights to ensure they are loaded correctly
state_dict = pytorch_model.state_dict()
for name, param in state_dict.items():
    print(f"{name}: {param}")

# Ensure that the weights are not zero after loading
print("Checking conv1 weights after loading:")
print(pytorch_model.conv1.weight.data)

# Assign quantized weights to the NNgen variables
def quantize_weights(weight_data, width):
    weight_data = np.clip(weight_data, -5.0, 5.0)
    weight_data = weight_data * (2.0 ** (width - 1) - 1) / 5.0
    return np.round(weight_data).astype(np.int64)

# Example quantization for conv1 weights
w0_value = quantize_weights(pytorch_model.conv1.weight.data.numpy(), 8)
print("Quantized conv1 weights:")
print(w0_value)

# Assign values to NNgen variables
w0.set_value(w0_value)
b0.set_value(pytorch_model.bn1.bias.data.numpy().astype(np.int64))
s0.set_value(np.ones(s0.shape, dtype=np.int64))

# Similarly, assign and check weights for other layers
w1.set_value(quantize_weights(pytorch_model.conv2.weight.data.numpy(), 8))
b1.set_value(pytorch_model.bn2.bias.data.numpy().astype(np.int64))
s1.set_value(np.ones(s1.shape, dtype=np.int64))

w2.set_value(quantize_weights(pytorch_model.conv3.weight.data.numpy(), 8))
b2.set_value(pytorch_model.bn3.bias.data.numpy().astype(np.int64))
s2.set_value(np.ones(s2.shape, dtype=np.int64))

w3.set_value(quantize_weights(pytorch_model.fc1.weight.data.numpy(), 8).transpose())
b3.set_value(pytorch_model.fc1.bias.data.numpy().astype(np.int64))
s3.set_value(np.ones(s3.shape, dtype=np.int64))

w4.set_value(quantize_weights(pytorch_model.fc2.weight.data.numpy(), 8).transpose())
b4.set_value(pytorch_model.fc2.bias.data.numpy().astype(np.int64))
s4.set_value(np.ones(s4.shape, dtype=np.int64))

w5.set_value(quantize_weights(pytorch_model.fc3.weight.data.numpy(), 8).transpose())
b5.set_value(pytorch_model.fc3.bias.data.numpy().astype(np.int64))
s5.set_value(np.ones(s5.shape, dtype=np.int64))

# Check the NNgen variables
print("NNgen variable w0 values:")
print(w0.value)

conv1.weight: tensor([[[[ 0.1532,  0.2262,  0.1518],
          [-0.1939, -0.2444, -0.0565],
          [-0.1329, -0.2885, -0.1596]],

         [[-0.0299,  0.2585,  0.1343],
          [-0.1591,  0.1495,  0.2431],
          [-0.1123, -0.0622, -0.0253]],

         [[-0.0038, -0.0171,  0.0136],
          [ 0.0598,  0.1440, -0.1341],
          [ 0.1565, -0.1545,  0.0560]]],


        [[[-0.0560, -0.2677, -0.0152],
          [-0.3005, -0.2657, -0.2643],
          [-0.1437, -0.1792, -0.2201]],

         [[ 0.1807,  0.3710,  0.0316],
          [ 0.4806,  0.6360,  0.4813],
          [ 0.2348,  0.3678,  0.3855]],

         [[-0.0407, -0.2538,  0.0169],
          [-0.3217, -0.2495, -0.3543],
          [-0.0284, -0.4016, -0.1158]]],


        [[[ 0.0054,  0.0565,  0.0595],
          [ 0.1971, -0.0149, -0.2864],
          [-0.0724,  0.1028, -0.0829]],

         [[ 0.1058,  0.2043, -0.2594],
          [ 0.2741, -0.0642, -0.1653],
          [ 0.1833, -0.0568, -0.2118]],

         [[ 0.2582, -0.0627, -

#Random parameters

In [24]:
# import numpy as np
# import torch


# # Assign quantized weights to the NNgen operators
# # Layer 0
# w0_value = np.clip(pytorch_model.conv1.weight.data.numpy(), -5.0, 5.0)
# w0_value = (w0_value * (2.0 ** (weight_dtype.width - 1) - 1) / 5.0).astype(np.int64)
# w0.set_value(w0_value)

# b0_value = np.clip(pytorch_model.bn1.bias.data.numpy(), -5.0, 5.0)
# b0_value = (b0_value * (2.0 ** (bias_dtype.width - 1) - 1) / 5.0 / 100.0).astype(np.int64)
# b0.set_value(b0_value)

# # Layer 1
# w1_value = np.clip(pytorch_model.conv2.weight.data.numpy(), -5.0, 5.0)
# w1_value = (w1_value * (2.0 ** (weight_dtype.width - 1) - 1) / 5.0).astype(np.int64)
# w1.set_value(w1_value)

# b1_value = np.clip(pytorch_model.bn2.bias.data.numpy(), -5.0, 5.0)
# b1_value = (b1_value * (2.0 ** (bias_dtype.width - 1) - 1) / 5.0 / 100.0).astype(np.int64)
# b1.set_value(b1_value)

# # Layer 2
# w2_value = np.clip(pytorch_model.conv3.weight.data.numpy(), -5.0, 5.0)
# w2_value = (w2_value * (2.0 ** (weight_dtype.width - 1) - 1) / 5.0).astype(np.int64)
# w2.set_value(w2_value)

# b2_value = np.clip(pytorch_model.bn3.bias.data.numpy(), -5.0, 5.0)
# b2_value = (b2_value * (2.0 ** (bias_dtype.width - 1) - 1) / 5.0 / 100.0).astype(np.int64)
# b2.set_value(b2_value)

# # Layer 3
# w3_value = np.clip(pytorch_model.fc1.weight.data.numpy(), -5.0, 5.0)
# w3_value = (w3_value * (2.0 ** (weight_dtype.width - 1) - 1) / 5.0).astype(np.int64)
# w3.set_value(w3_value)

# b3_value = np.clip(pytorch_model.fc1.bias.data.numpy(), -5.0, 5.0)
# b3_value = (b3_value * (2.0 ** (bias_dtype.width - 1) - 1) / 5.0 / 100.0).astype(np.int64)
# b3.set_value(b3_value)

# # Layer 4
# w4_value = np.clip(pytorch_model.fc2.weight.data.numpy(), -5.0, 5.0)
# w4_value = (w4_value * (2.0 ** (weight_dtype.width - 1) - 1) / 5.0).astype(np.int64)
# w4.set_value(w4_value)

# b4_value = np.clip(pytorch_model.fc2.bias.data.numpy(), -5.0, 5.0)
# b4_value = (b4_value * (2.0 ** (bias_dtype.width - 1) - 1) / 5.0 / 100.0).astype(np.int64)
# b4.set_value(b4_value)

# # Layer 5
# w5_value = np.clip(pytorch_model.fc3.weight.data.numpy(), -5.0, 5.0)
# w5_value = (w5_value * (2.0 ** (weight_dtype.width - 1) - 1) / 5.0).astype(np.int64)
# w5.set_value(w5_value)

# b5_value = np.clip(pytorch_model.fc3.bias.data.numpy(), -5.0, 5.0)
# b5_value = (b5_value * (2.0 ** (bias_dtype.width - 1) - 1) / 5.0 / 100.0).astype(np.int64)
# b5.set_value(b5_value)
# output_layer = ng.matmul(a4, w5, bias=b5, scale=s5, transposed_b=True, name='output_layer', sum_dtype=ng.int64)

# # Print a message indicating successful completion
# print("Quantized weights assigned to all layers successfully.")


Quantized weights assigned to all layers successfully.


#HARDWARE ATTRIBUTES


In [35]:
# conv2d, matmul
# par_ich: parallelism in input-channel
# par_och: parallelism in output-channel
# par_col: parallelism in pixel column
# par_row: parallelism in pixel row
# cshamt_out: right shift amount after applying bias/scale

par = 2  # Example value for parallelism
value_ram_size = 1024  # Example value for value RAM size
out_ram_size = 512  # Example value for output RAM size

par_ich = 2
par_och = 2
cshamt_out = weight_dtype.width + 1

a0.attribute(par=par, value_ram_size=value_ram_size, out_ram_size=out_ram_size)
a1.attribute(par=par, value_ram_size=value_ram_size, out_ram_size=out_ram_size)
a2.attribute(par=par, value_ram_size=value_ram_size, out_ram_size=out_ram_size)
output_layer.attribute(par_ich=par_ich, par_och=par_och,
                       cshamt_out=weight_dtype.width + 1)

# max_pool
# par: parallelism in in/out channel



#TESTING AND VERIFICATION

In [36]:
input_layer_value = np.random.normal(size=input_layer.length).reshape(input_layer.shape)
input_layer_value = np.clip(input_layer_value, -5.0, 5.0)
input_layer_value = input_layer_value * (2.0 ** (input_layer.dtype.width - 1) - 1) / 5.0
input_layer_value = np.round(input_layer_value).astype(np.int64)

print(output_layer.value)
eval_outs = ng.eval([output_layer], input_layer=input_layer_value)
output_layer_value = eval_outs[0]

print(output_layer_value)


None
[[ 3 -4  1 -2 -6 -6  0  4 -1 -7]]


In [27]:
print([output_layer])

[<nngen.operator.matmul.matmul object at 0x7d67a1937dc0>]


#VERILOG CONVERSION

In [32]:
silent = False
axi_datawidth = 32

param_filename = 'complexcnn.npy'
chunk_size = 32

# Export the parameters
param_data = ng.export_ndarray([output_layer], chunk_size)

print(param_data)
np.save(param_filename, param_data)

# to Veriloggen object
try:
    targ = ng.to_veriloggen([output_layer], 'complexcnn', silent=silent, config={'maxi_datawidth': axi_datawidth})
    print("Veriloggen object generated successfully")
except Exception as e:
    print("An error occurred during Veriloggen generation:")
    print(str(e))


[235   3 202 ...   0   0   0]
NNgen: Neural Network Accelerator Generator (version 1.3.4)
[Configuration]
(AXI Master Interface)
  Data width   : 32
  Address width: 32
(AXI Slave Interface)
  Data width   : 32
  Address width: 32
[Schedule Table]
(Stage 0)
(Stage 1)
  <conv2d None dtype:int16 shape:(1, 32, 32, 64) strides:(1, 1, 1, 1) padding:'SAME'-(1, 1, 1, 1) bias:(64,) scale:(64,) act_func:relu sum_dtype:int64 concur_och:8 stationary:filter keep_input default_addr:10205632 g_index:0 l_index:1 word_alignment:2 aligned_shape:(1, 32, 32, 64) scale_factor:1.000000>
  | <placeholder input_layer dtype:int16 shape:(1, 32, 32, 3) default_addr:64 g_index:2 word_alignment:2 aligned_shape:(1, 32, 32, 4) scale_factor:1.000000>
  | <variable w0 dtype:int16 shape:(64, 3, 3, 3) default_addr:8256 g_index:3 word_alignment:2 aligned_shape:(64, 3, 3, 4) scale_factor:1.000000>
  | <variable b0 dtype:int16 shape:(64,) default_addr:8256 g_index:3 word_alignment:2 aligned_shape:(64,) scale_factor:1.0000

In [33]:
print(param_data)

[235   3 202 ...   0   0   0]


In [29]:
import math
from veriloggen import *
import veriloggen.types.axi as axi
import veriloggen.thread as vthread


chunk_size = 64
outputfile = 'complex.out'
filename = 'complexcnn.v'

param_bytes = len(param_data)

variable_addr = int(
    math.ceil((input_layer.addr + input_layer.memory_size) / chunk_size)) * chunk_size
check_addr = int(math.ceil((variable_addr + param_bytes) / chunk_size)) * chunk_size
tmp_addr = int(math.ceil((check_addr + output_layer.memory_size) / chunk_size)) * chunk_size

memimg_datawidth = 32
mem = np.zeros([1024 * 1024 * 256 // memimg_datawidth], dtype=np.int64)
mem = mem + [100]

# placeholder
axi.set_memory(mem, input_layer_value, memimg_datawidth,
               act_dtype.width, input_layer.addr,
               max(int(math.ceil(axi_datawidth / act_dtype.width)), par_ich))

# parameters (variable and constant)
axi.set_memory(mem, param_data, memimg_datawidth,
               8, variable_addr)

# verification data
axi.set_memory(mem, output_layer_value, memimg_datawidth,
               act_dtype.width, check_addr,
               max(int(math.ceil(axi_datawidth / act_dtype.width)), par_och))

# test controller
m = Module('test')
params = m.copy_params(targ)
ports = m.copy_sim_ports(targ)
clk = ports['CLK']
resetn = ports['RESETN']
rst = m.Wire('RST')
rst.assign(Not(resetn))

# AXI memory model (skipped for brevity)

# AXI-Slave controller (skipped for brevity)

# timer (skipped for brevity)

def ctrl():
  pass
    # Control logic (skipped for brevity)

th = vthread.Thread(m, 'th_ctrl', clk, rst, ctrl)
fsm = th.start()

uut = m.Instance(targ, 'uut',
                 params=m.connect_params(targ),
                 ports=m.connect_ports(targ))

# Output Verilog code
if filename is not None:
    m.to_verilog(filename)
    print("verilog file generated successfully")

verilog file generated successfully


In [37]:
# Load the .npy file
data = np.load('complexcnn.npy')

# Display the data
print(data)
print(dir(output_layer))
matmul_obj=output_layer
print("Shape:", matmul_obj.shape)
print("Data type:", matmul_obj.dtype)
print("Arguments (input tensors):", matmul_obj.value)

import torch

# Load the .pth file
file_path = 'cifar_net.pth'
data = torch.load(file_path)

# Print the contents of the .pth file
print(data)


[235   3 202 ...   0   0   0]
['Cparam', 'CparamWire', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__intrinsics__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__sub_str__', '__subclasshook__', '__weakref__', '_collect_arg_numerics', '_collect_numerics', '_conv2d__set_latency', '_name', 'act_bat_step', 'act_func', 'act_func_index', 'act_num_col', 'act_num_row', 'act_offset_values', 'act_read_block', 'act_read_size', 'act_read_step', 'act_row_step', 'add_alignment_request', 'add_consumer', 'addr', 'aligned_length', 'aligned_shape', 'aligned_size', 'arg_objaddrs', 'args', 'args_dict', 'asymmetric_clip', 'attribute', 'bias_num', 'bias_ram_size', 'bias_scala', 'cached_control', 'cached_ram_set', 'cached_stream', 'chain_head', 'check_ram_requirements', 'clk', 'col_select_