In [1]:
# Let's import the dependencies we need
import numpy as np
import sys
import torch
import dotenv

# LOAD SYSTEM ENVIRONMENT VARIABLES - To compile Verilator from here
dotenv.load_dotenv('../env', override=True)

sys.path.insert(1, './../') # To find the libraries inside Python folder
import src.hw_versions as hwv
import src.sauria_lib as slib

In [2]:
import os
import subprocess

# Version - See 'Python/versions/hw_versions.py'
sauria_version = 'int8_32x32'

cwd = os.getcwd()

os.chdir("../../test/verilator")
f1 = open("verilator_compile.log","w")
subprocess.call(["sh","./compile_sauria.sh",sauria_version],stdout=f1)
os.chdir(cwd)

In [15]:
# Convolution options:
C_in = 64       # Input Channels
C_out = 64      # Output Channels
Kh,Kw = 3,3     # Kernel size
s = 1           # Strides
d = 1           # Dilation coefficient
#p = 0          # Padding (UNSUPPORTED ATM!)

# Define pytorch convolutional layer (randomly initialized weights & biases)
# B_conv_torch = torch.nn.Conv2d(C_in, C_out, (Kh, Kw), stride=s, dilation=d, dtype=torch.int8)

# Output tensor shape
Cw = 64         # Output tensor width
Ch = 64         # Output tensor height

# Input tensor shape determined by output tensor shape
Aw = (1+s*(Cw-1)) + (1+d*(Kw-1)) - 1
Ah = (1+s*(Ch-1)) + (1+d*(Kh-1)) - 1

# Randomly generate input tensors
tensor_A_torch = torch.randint(-127,127, (C_in, Ah, Aw), dtype=torch.int8)

# Randomly generate weights and biases
tensor_B_torch = torch.randint(-127,127, (C_out, C_in, Kh, Kw), dtype=torch.int8)
tensor_bias_torch = torch.randint(-127,127, (C_out, 1, 1), dtype=torch.int8)

# Perform convolution with Pytorch and print result
tensor_C_torch = tensor_bias_torch + torch.nn.functional.conv2d(tensor_A_torch.double(),tensor_B_torch.double(),stride=s,padding=0,dilation=d)
tensor_C_torch = tensor_C_torch.int()

print(tensor_C_torch.shape)
print(tensor_C_torch[:3,:3,:3])

torch.Size([64, 64, 64])
tensor([[[ 216115,     124, -210426],
         [ 236552,  158716,   98635],
         [ -69940, -107108, -110437]],

        [[ 124946,  156052,  126372],
         [ -39921, -177881, -144948],
         [  71120,   82676,   10924]],

        [[ -45673,  280687, -162275],
         [ -50123,  -28998,   27355],
         [ -72920,   86360, -128622]]], dtype=torch.int32)


In [16]:
# Input tensor is the same, but converted to numpy
tensor_A = np.array(tensor_A_torch.detach())

# Weights tensor is obtained from the conv layer (randomly generated)
tensor_B = np.array(tensor_B_torch.detach())

# Bias can be added by preloading data into the array
# (This is OPTIONAL! It adds the cost of replicating the data!)
# (However, it is useful as an example of data preloading)
bias_numpy = np.array(tensor_bias_torch.detach())
preload_C = np.zeros([C_out,Ch,Cw])
preload_C[:,:,:] = np.reshape(bias_numpy,[C_out,1,1])

# Convert result into numpy to compare
tensor_C = np.array(tensor_C_torch.detach())

print(tensor_C.shape)
print(tensor_C[:3,:3,:3])

(64, 64, 64)
[[[0x34c33 0x7c -0x335fa]
  [0x39c08 0x26bfc 0x1814b]
  [-0x11134 -0x1a264 -0x1af65]]

 [[0x1e812 0x26194 0x1eda4]
  [-0x9bf1 -0x2b6d9 -0x23634]
  [0x115d0 0x142f4 0x2aac]]

 [[-0xb269 0x4486f -0x279e3]
  [-0xc3cb -0x7146 0x6adb]
  [-0x11cd8 0x15158 -0x1f66e]]]


In [17]:
np.set_printoptions(formatter={'int':hex})
tensor_B[:20,0,0,0].astype(np.uint8)

array([0x74, 0xb, 0x76, 0x7d, 0x84, 0x79, 0xf9, 0xc, 0xb9, 0x90, 0x0,
       0x75, 0x69, 0xfb, 0x68, 0x86, 0xac, 0xfd, 0x46, 0xde], dtype=uint8)

In [22]:
# Dictionary of hardware parameters describing the version of SAURIA
HW_PARAMS = hwv.get_params(sauria_version)

# Array with the tensor shapes to compute
tensor_shapes = [tensor_A.shape, tensor_B.shape, tensor_C.shape]

# Dictionary describing the tiling sizes
TILING_DICT = {
    'C_tile_shape'  :   [64,8,64],  #[C_out, Ch, Cw]
    'tile_cin'      :   32,
    'X_used'        :   32,
    'Y_used'        :   32
}

# Dictionary fully describing the convolution to compute
CONV_DICT = slib.get_conv_dict(tensor_shapes, TILING_DICT, HW_PARAMS, d=d, s=s, preloads=True)

print(CONV_DICT)

{'B_w': 3, 'B_h': 3, 'C_w': 64, 'C_h': 64, 'C_c': 64, 'A_w': 66, 'A_h': 66, 'A_c': 64, 'AB_c': 64, 'd': 1, 's': 1, 'w_til': 64, 'h_til': 8, 'c_til': 32, 'k_til': 64, 'A_w_til': 66, 'A_h_til': 10, 'X_ext_tiles': 1, 'Y_ext_tiles': 8, 'K_ext_tiles': 1, 'C_ext_tiles': 2, 'N_total_tiles': 16, 'B_w_eff': 3, 'B_h_eff': 3, 'X_int_tiles': 2, 'Y_int_tiles': 8, 'K_int_tiles': 2, 'N_cswitch': 32, 'X_used': 32, 'Y_used': 32, 'preload_en': True, 'Dil_pat': 16140901064495857664, 'rows_active': 4294967295, 'cols_active': 4294967295, 'lwoffs': array([0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc,
       0xd, 0xe, 0xf, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
       0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f]), 'thres': 0}


In [23]:
from importlib import reload
import src.config_helper as cfg
reload(slib)
reload(hwv)
reload(cfg)

SAURIA_outputs, SAURIA_stats = slib.Conv2d_SAURIA(tensor_A, tensor_B, preload_C, tensor_C, CONV_DICT, HW_PARAMS, generate_vcd=False, print_statistics=True, silent=False)

[[0x50000200 0x4011f]
 [0x50000204 0x20]
 [0x50000400 0x400043]
 [0x50000404 0x2100318]
 [0x50000408 0x52852800]
 [0x5000040c 0x10001000]
 [0x50000410 0x84021000]
 [0x50000414 0x0]
 [0x50000418 0x0]
 [0x5000041c 0xffffff80]
 [0x50000420 0x80403ff]
 [0x50000424 0x1814100c]
 [0x50000428 0x2824201c]
 [0x5000042c 0x3834302c]
 [0x50000430 0x4844403c]
 [0x50000434 0x5854504c]
 [0x50000438 0x6864605c]
 [0x5000043c 0x7874706c]
 [0x50000440 0x7c]
 [0x50000600 0x804800]
 [0x50000604 0x1000004]
 [0x50000608 0x4000400]
 [0x5000060c 0xffffffc0]
 [0x50000610 0x7f]
 [0x50000800 0x400020]
 [0x50000804 0x40000020]
 [0x50000808 0x2000200]
 [0x5000080c 0x80000020]
 [0x50000810 0x1004000]]
29
[16805888, 4194304, 32, 268697664, 545259522, 2147500034, 520093700, 268436480, 272646656, 2483290113, 2, 1090519040, 3506438148, 42008580, 262431, 32, 4194371, 34603800, 1384458240, 268439552, 2214727680, 0, 0, 4294967168, 134480895, 403968012, 673456156, 942944300, 1212432444, 1481920588, 1751408732, 2020896876, 12

In [None]:
# Print and compare to Pytorch result
print("From Pytorch:")
print(tensor_C[:3,:3,:3])

print("\nFrom SAURIA:")
SAURIA_outputs = SAURIA_outputs.astype(np.int32)
print(SAURIA_outputs[:3,:3,:3])

print("\nAverage absolute error:")
print(np.abs(SAURIA_outputs - tensor_C).mean())

From Pytorch:
[[[0x0 0x0 0x0]
  [0x0 0x0 0x0]
  [0x0 0x0 0x0]]

 [[0x1 0x1 0x1]
  [0x1 0x1 0x1]
  [0x1 0x1 0x1]]

 [[0x2 0x2 0x2]
  [0x2 0x2 0x2]
  [0x2 0x2 0x2]]]

From SAURIA:
[[[0x10 0x10 0x10]
  [0x10 0x10 0x10]
  [0x10 0x10 0x10]]

 [[0x11 0x11 0x11]
  [0x11 0x11 0x11]
  [0x11 0x11 0x11]]

 [[0x12 0x12 0x12]
  [0x12 0x12 0x12]
  [0x12 0x12 0x12]]]

Average absolute error:
8.0


: 

In [None]:
np.abs(SAURIA_outputs - tensor_C).max()

183

: 

In [None]:
HW_PARAMS

{'MainMemory_offset': 0,
 'SAURIA_offset_DMA': 3489660928,
 'CTRL_offset': 1073741824,
 'CORE_offset': 1342177280,
 'DMA_offset': 1610612736,
 'CFG_CON_offset': 512,
 'CFG_IFM_offset': 1024,
 'CFG_WEI_offset': 1536,
 'CFG_PSM_offset': 2048,
 'MEMA_offset': 262144,
 'MEMB_offset': 524288,
 'MEMC_offset': 786432,
 'CFG_AXI_DATA_WIDTH': 32,
 'CFG_AXI_ADDR_WIDTH': 32,
 'MEMA_DEPTH': 2048,
 'MEMB_DEPTH': 2048,
 'MEMC_DEPTH': 1024,
 'DATA_AXI_DATA_WIDTH': 128,
 'DATA_AXI_ADDR_WIDTH': 32,
 'X': 32,
 'Y': 32,
 'DILP_W': 64,
 'PARAMS_W': 8,
 'TH_W': 2,
 'IFM_FIFO_POSITIONS': 5,
 'WEI_FIFO_POSITIONS': 4,
 'FIFO_FILL_CYCLES': 1,
 'IA_W': 8,
 'IB_W': 8,
 'OC_W': 32,
 'OP_TYPE': 0,
 'IA_MANT': 0,
 'IB_MANT': 0,
 'IC_MANT': 0,
 'rounding': 'RNE',
 'approx_comp': False,
 'mul_type': 0,
 'M': 0,
 'add_type': 0,
 'A': 0,
 'MEMA_W': 256,
 'MEMB_W': 256,
 'MEMC_W': 1024,
 'ADRA_W': 11,
 'ADRB_W': 11,
 'ADRC_W': 10,
 'MEMA_N': 32,
 'IFM_WOFS_W': 5,
 'IFM_IDX_W': 17,
 'MEMB_N': 32,
 'WEI_WOFS_W': 5,
 'WEI_

: 

: 