In [1]:
# Let's import the dependencies we need
import numpy as np
import sys
import torch
import dotenv

# LOAD SYSTEM ENVIRONMENT VARIABLES - To compile Verilator from here
dotenv.load_dotenv('../env', override=True)

sys.path.insert(1, './../') # To find the libraries inside Python folder
import src.hw_versions as hwv
import src.sauria_lib as slib

In [2]:
import os
import subprocess

# Version - See 'Python/versions/hw_versions.py'
sauria_version = 'int8_8x16'

cwd = os.getcwd()

os.chdir("../../test/verilator")
f1 = open("verilator_compile.log","w")
subprocess.call(["sh","./compile_sauria.sh",sauria_version],stdout=f1)
os.chdir(cwd)

In [3]:
# Convolution options:
C_in = 32       # Input Channels
C_out = 32      # Output Channels
Kh,Kw = 3,3     # Kernel size
s = 1           # Strides
d = 1           # Dilation coefficient
#p = 0           # Padding (UNSUPPORTED ATM!)

# Define pytorch convolutional layer (randomly initialized weights & biases)
# B_conv_torch = torch.nn.Conv2d(C_in, C_out, (Kh, Kw), stride=s, dilation=d, dtype=torch.int8)

# Output tensor shape
Cw = 8         # Output tensor width
Ch = 8          # Output tensor height

# Input tensor shape determined by output tensor shape
Aw = (1+s*(Cw-1)) + (1+d*(Kw-1)) - 1
Ah = (1+s*(Ch-1)) + (1+d*(Kh-1)) - 1

# Randomly generate input tensors
tensor_A_torch = torch.randint(-127,127, (C_in, Ah, Aw), dtype=torch.int8)

# Randomly generate weights and biases
tensor_B_torch = torch.randint(-127,127, (C_out, C_in, Kh, Kw), dtype=torch.int8)
tensor_bias_torch = torch.randint(-127,127, (C_out, 1, 1), dtype=torch.int8)

# Perform convolution with Pytorch and print result
tensor_C_torch = tensor_bias_torch + torch.nn.functional.conv2d(tensor_A_torch.double(),tensor_B_torch.double(),stride=s,padding=0,dilation=d)
tensor_C_torch = tensor_C_torch.int()

print(tensor_C_torch.shape)
print(tensor_C_torch[:3,:3,:3])

torch.Size([32, 8, 8])
tensor([[[  57610,   26017,    3319],
         [  80710,  -57428,   17817],
         [ -71833,   48065,  304173]],

        [[ -59665,  -60112,  -98141],
         [ -72596,   47170,   -2885],
         [-159964,  -36923, -240583]],

        [[ -68724, -107775,  -92590],
         [  60002,   -7530,  -13779],
         [ -28472,  -38099,   71122]]], dtype=torch.int32)


In [4]:
# Input tensor is the same, but converted to numpy
tensor_A = np.array(tensor_A_torch.detach())

# Weights tensor is obtained from the conv layer (randomly generated)
tensor_B = np.array(tensor_B_torch.detach())

# Bias can be added by preloading data into the array
# (This is OPTIONAL! It adds the cost of replicating the data!)
# (However, it is useful as an example of data preloading)
bias_numpy = np.array(tensor_bias_torch.detach())
preload_C = np.zeros([C_out,Ch,Cw])
preload_C[:,:,:] = np.reshape(bias_numpy,[C_out,1,1])

# Convert result into numpy to compare
tensor_C = np.array(tensor_C_torch.detach())

print(tensor_C.shape)
print(tensor_C[:3,:3,:3])

(32, 8, 8)
[[[  57610   26017    3319]
  [  80710  -57428   17817]
  [ -71833   48065  304173]]

 [[ -59665  -60112  -98141]
  [ -72596   47170   -2885]
  [-159964  -36923 -240583]]

 [[ -68724 -107775  -92590]
  [  60002   -7530  -13779]
  [ -28472  -38099   71122]]]


In [5]:
np.set_printoptions(formatter={'int':hex})
tensor_B[:20,0,0,0].astype(np.uint8)

array([0x8d, 0xfe, 0xc7, 0xf2, 0xd5, 0x62, 0x62, 0x42, 0x87, 0xd9, 0x49,
       0x4, 0x7b, 0x86, 0x54, 0xf0, 0x73, 0xe4, 0xba, 0xe6], dtype=uint8)

In [12]:
# Dictionary of hardware parameters describing the version of SAURIA
HW_PARAMS = hwv.get_params(sauria_version)

# Array with the tensor shapes to compute
tensor_shapes = [tensor_A.shape, tensor_B.shape, tensor_C.shape]

# Dictionary describing the tiling sizes
TILING_DICT = {
    'C_tile_shape'  :   [32,4,8],  #[C_out, Ch, Cw]
    'tile_cin'      :   32,
    'X_used'        :   16,
    'Y_used'        :   8
}

# Dictionary fully describing the convolution to compute
CONV_DICT = slib.get_conv_dict(tensor_shapes, TILING_DICT, HW_PARAMS, d=d, s=s, preloads=True)

print(CONV_DICT)

{'B_w': 3, 'B_h': 3, 'C_w': 8, 'C_h': 8, 'C_c': 32, 'A_w': 10, 'A_h': 10, 'A_c': 32, 'AB_c': 32, 'd': 1, 's': 1, 'w_til': 8, 'h_til': 4, 'c_til': 32, 'k_til': 32, 'A_w_til': 10, 'A_h_til': 6, 'X_ext_tiles': 1, 'Y_ext_tiles': 2, 'K_ext_tiles': 1, 'C_ext_tiles': 1, 'N_total_tiles': 2, 'B_w_eff': 3, 'B_h_eff': 3, 'X_int_tiles': 1, 'Y_int_tiles': 4, 'K_int_tiles': 2, 'N_cswitch': 8, 'X_used': 16, 'Y_used': 8, 'preload_en': True, 'Dil_pat': 16140901064495857664, 'rows_active': 255, 'cols_active': 65535, 'lwoffs': array([0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7]), 'thres': 0}


In [10]:
from importlib import reload
import src.config_helper as cfg
reload(slib)
reload(hwv)
reload(cfg)

SAURIA_outputs, SAURIA_stats = slib.Conv2d_SAURIA(tensor_A, tensor_B, preload_C, tensor_C, CONV_DICT, HW_PARAMS, generate_vcd=True, print_statistics=True, silent=False)


o_incntlim mapped in:
Start_reg_idx = 0; Start_bit = 0
  End_reg_idx = 0;   End_bit = 14

o_act_reps mapped in:
Start_reg_idx = 0; Start_bit = 15
  End_reg_idx = 0;   End_bit = 28

o_wei_reps mapped in:
Start_reg_idx = 0; Start_bit = 29
  End_reg_idx = 1;   End_bit = 10

o_thres mapped in:
Start_reg_idx = 1; Start_bit = 11
  End_reg_idx = 1;   End_bit = 12

o_xlim mapped in:
Start_reg_idx = 2; Start_bit = 0
  End_reg_idx = 2;   End_bit = 14

o_xstep mapped in:
Start_reg_idx = 2; Start_bit = 15
  End_reg_idx = 2;   End_bit = 29

o_ylim mapped in:
Start_reg_idx = 2; Start_bit = 30
  End_reg_idx = 3;   End_bit = 12

o_ystep mapped in:
Start_reg_idx = 3; Start_bit = 13
  End_reg_idx = 3;   End_bit = 27

o_chlim mapped in:
Start_reg_idx = 3; Start_bit = 28
  End_reg_idx = 4;   End_bit = 10

o_chstep mapped in:
Start_reg_idx = 4; Start_bit = 11
  End_reg_idx = 4;   End_bit = 25

o_til_xlim mapped in:
Start_reg_idx = 4; Start_bit = 26
  End_reg_idx = 5;   End_bit = 8

o_til_xstep mapped in:


In [11]:
# Print and compare to Pytorch result
print("From Pytorch:")
print(tensor_C[:3,:3,:3])

print("\nFrom SAURIA:")
SAURIA_outputs = SAURIA_outputs.astype(np.int32)
print(SAURIA_outputs[:3,:3,:3])

print("\nAverage absolute error:")
print(np.abs(SAURIA_outputs - tensor_C).mean())

From Pytorch:
[[[0xe10a 0x65a1 0xcf7]
  [0x13b46 -0xe054 0x4599]
  [-0x11899 0xbbc1 0x4a42d]]

 [[-0xe911 -0xead0 -0x17f5d]
  [-0x11b94 0xb842 -0xb45]
  [-0x270dc -0x903b -0x3abc7]]

 [[-0x10c74 -0x1a4ff -0x169ae]
  [0xea62 -0x1d6a -0x35d3]
  [-0x6f38 -0x94d3 0x115d2]]]

From SAURIA:
[[[0xe10a 0x65a1 0xcf7]
  [0x13b46 -0xe054 0x4599]
  [-0x11899 0xbbc1 0x4a42d]]

 [[-0xe911 -0xead0 -0x17f5d]
  [-0x11b94 0xb842 -0xb45]
  [-0x270dc -0x903b -0x3abc7]]

 [[-0x10c74 -0x1a4ff -0x169ae]
  [0xea62 -0x1d6a -0x35d3]
  [-0x6f38 -0x94d3 0x115d2]]]

Average absolute error:
0.0
