# INT8, 32x32 VERSION EXAMPLE

This notebook follows the same structure as `example_basic`, but showcases how to use the accelerator with a different arithmetic (int8 instead of FP16) and array size (32x32 instead of 8x16).

Note that requantization inside the accelerator is not supported (yet), so when using int8 arithmetic for the inputs, the output partial sums use 32 bits (16 for the multiplication plus 16 to avoid overflows in the reduction).

In [17]:
# Let's import the dependencies we need
import numpy as np
import sys
import torch
import dotenv

# LOAD SYSTEM ENVIRONMENT VARIABLES - To compile Verilator from here
dotenv.load_dotenv('../env', override=True)

sys.path.insert(1, './../') # To find the libraries inside Python folder
import src.hw_versions as hwv
import src.sauria_lib as slib

In [12]:
import os
import subprocess

# Version - See 'Python/versions/hw_versions.py'
sauria_version = 'int8_32x32'

cwd = os.getcwd()

os.chdir("../../test/verilator")
f1 = open("verilator_compile.log","w")
subprocess.call(["sh","./compile_sauria.sh",sauria_version],stdout=f1)
os.chdir(cwd)

In [26]:
# Convolution options:
C_in = 64       # Input Channels
C_out = 64      # Output Channels
Kh,Kw = 3,3     # Kernel size
s = 1           # Strides
d = 1           # Dilation coefficient
#p = 0          # Padding (UNSUPPORTED ATM!)

# Define pytorch convolutional layer (randomly initialized weights & biases)
# B_conv_torch = torch.nn.Conv2d(C_in, C_out, (Kh, Kw), stride=s, dilation=d, dtype=torch.int8)

# Output tensor shape
Cw = 64         # Output tensor width
Ch = 64         # Output tensor height

# Input tensor shape determined by output tensor shape
Aw = (1+s*(Cw-1)) + (1+d*(Kw-1)) - 1
Ah = (1+s*(Ch-1)) + (1+d*(Kh-1)) - 1

# Randomly generate input tensors
tensor_A_torch = torch.randint(-127,127, (C_in, Ah, Aw), dtype=torch.int8)

# Randomly generate weights and biases
tensor_B_torch = torch.randint(-127,127, (C_out, C_in, Kh, Kw), dtype=torch.int8)
tensor_bias_torch = torch.randint(-127,127, (C_out, 1, 1), dtype=torch.int8)

# Perform convolution with Pytorch and print result
tensor_C_torch = tensor_bias_torch + torch.nn.functional.conv2d(tensor_A_torch.double(),tensor_B_torch.double(),stride=s,padding=0,dilation=d)
tensor_C_torch = tensor_C_torch.int()

print(tensor_C_torch.shape)
print(tensor_C_torch[:3,:3,:3])

torch.Size([64, 64, 64])
tensor([[[-105853,   41891,  -61168],
         [  50246,  161502,  -12996],
         [ -42250,   13467,   39359]],

        [[ -73952,  143520, -172990],
         [-107578,    8750,    1030],
         [  14800,   17017,  -70818]],

        [[ 162033,   31859,  -47837],
         [ -28519, -374222,  137459],
         [ -59825,  130661,   54387]]], dtype=torch.int32)


In [27]:
# Input tensor is the same, but converted to numpy
tensor_A = np.array(tensor_A_torch.detach())

# Weights tensor is obtained from the conv layer (randomly generated)
tensor_B = np.array(tensor_B_torch.detach())

# Bias can be added by preloading data into the array
# (This is OPTIONAL! It adds the cost of replicating the data!)
# (However, it is useful as an example of data preloading)
bias_numpy = np.array(tensor_bias_torch.detach())
preload_C = np.zeros([C_out,Ch,Cw])
preload_C[:,:,:] = np.reshape(bias_numpy,[C_out,1,1])

# Convert result into numpy to compare
tensor_C = np.array(tensor_C_torch.detach())

print(tensor_C.shape)
print(tensor_C[:3,:3,:3])

(64, 64, 64)
[[[-0x19d7d 0xa3a3 -0xeef0]
  [0xc446 0x276de -0x32c4]
  [-0xa50a 0x349b 0x99bf]]

 [[-0x120e0 0x230a0 -0x2a3be]
  [-0x1a43a 0x222e 0x406]
  [0x39d0 0x4279 -0x114a2]]

 [[0x278f1 0x7c73 -0xbadd]
  [-0x6f67 -0x5b5ce 0x218f3]
  [-0xe9b1 0x1fe65 0xd473]]]


In [None]:
# Dictionary of hardware parameters describing the version of SAURIA
HW_PARAMS = hwv.get_params(sauria_version)

# Array with the tensor shapes to compute
tensor_shapes = [tensor_A.shape, tensor_B.shape, tensor_C.shape]

# Dictionary describing the tiling sizes
TILING_DICT = {
    'C_tile_shape'  :   [64,8,64],  #[C_out, Ch, Cw]
    'tile_cin'      :   64,
    'X_used'        :   32,
    'Y_used'        :   32
}

# Dictionary fully describing the convolution to compute
CONV_DICT = slib.get_conv_dict(tensor_shapes, TILING_DICT, HW_PARAMS, d=d, s=s, preloads=True)

print(CONV_DICT)

{'B_w': 3, 'B_h': 3, 'C_w': 64, 'C_h': 64, 'C_c': 64, 'A_w': 66, 'A_h': 66, 'A_c': 64, 'AB_c': 64, 'd': 1, 's': 1, 'w_til': 64, 'h_til': 8, 'c_til': 32, 'k_til': 64, 'A_w_til': 66, 'A_h_til': 10, 'X_ext_tiles': 1, 'Y_ext_tiles': 8, 'K_ext_tiles': 1, 'C_ext_tiles': 2, 'N_total_tiles': 16, 'B_w_eff': 3, 'B_h_eff': 3, 'X_int_tiles': 2, 'Y_int_tiles': 8, 'K_int_tiles': 2, 'N_cswitch': 32, 'X_used': 32, 'Y_used': 32, 'preload_en': True, 'Dil_pat': 16140901064495857664, 'rows_active': 4294967295, 'cols_active': 4294967295, 'lwoffs': array([0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc,
       0xd, 0xe, 0xf, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
       0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f]), 'thres': 0}


In [31]:
from importlib import reload
import src.config_helper as cfg
import src.data_helper as dh
reload(slib)
reload(hwv)
reload(cfg)
reload(dh)

SAURIA_outputs, SAURIA_stats = slib.Conv2d_SAURIA(tensor_A, tensor_B, preload_C, tensor_C, CONV_DICT, HW_PARAMS, generate_vcd=True, print_statistics=True, silent=False)


              TEST PASSED

****************************************
          SAURIA STATISTICS
****************************************
Total cycles:				213534
Total operations:			301989888
Average Throughput:			1414.25 OP/cycle (69.06 %)

Number of tiles:			16
Core stall cycles:			4352 (2.04 %)
Memory/CGF stall cycles:		61646 (28.87 %)

SAURIA memory capacity (A|B|C):		64.0 | 64.0 | 128.0 [kB]
Utilized memory:			20.625 | 18.0 | 128.0 [kB] (32.23 | 28.12 | 100.00 [%])


In [25]:
# Print and compare to Pytorch result
print("From Pytorch:")
print(tensor_C[:3,:3,:3])

print("\nFrom SAURIA:")
SAURIA_outputs = SAURIA_outputs.astype(np.int32)
print(SAURIA_outputs[:3,:3,:3])

print("\nAverage absolute error:")
print(np.abs(SAURIA_outputs - tensor_C).mean())

From Pytorch:
[[[0x8032 0x1ec2f -0x1e796]
  [-0x406b 0x8eed -0xac7b]
  [0x1e1e -0x178a -0xee5c]]

 [[0x35439 -0x9405 -0xdc41]
  [-0x14ee0 0x10ac9 0xad54]
  [-0x7265 -0x113e5 -0x42f5]]

 [[0x16ee7 0xe9ea -0x11b82]
  [-0xf875 0x955c -0x8eca]
  [0x7dbd 0x4da5 0x3305d]]]

From SAURIA:
[[[0x8032 0x1ec2f -0x1e796]
  [-0x406b 0x8eed -0xac7b]
  [0x1e1e -0x178a -0xee5c]]

 [[0x35439 -0x9405 -0xdc41]
  [-0x14ee0 0x10ac9 0xad54]
  [-0x7265 -0x113e5 -0x42f5]]

 [[0x16ee7 0xe9ea -0x11b82]
  [-0xf875 0x955c -0x8eca]
  [0x7dbd 0x4da5 0x3305d]]]

Average absolute error:
0.0
