# INT8, 32x32 VERSION EXAMPLE

This notebook follows the same structure as `example_basic`, but showcases how to use the accelerator with a different arithmetic (int8 instead of FP16) and array size (32x32 instead of 8x16).

Note that requantization inside the accelerator is not supported (yet), so when using int8 arithmetic for the inputs, the output partial sums use 32 bits (16 for the multiplication plus 16 to avoid overflows in the reduction).

In [None]:
# Let's import the dependencies we need
import numpy as np
import sys
import torch
import dotenv

# LOAD SYSTEM ENVIRONMENT VARIABLES - To compile Verilator from here
dotenv.load_dotenv('../env', override=True)

sys.path.insert(1, './../') # To find the libraries inside Python folder
import src.hw_versions as hwv
import src.sauria_lib as slib

In [None]:
import os
import subprocess

# Version - See 'Python/versions/hw_versions.py'
sauria_version = 'int8_32x32'

cwd = os.getcwd()

os.chdir("../../test/verilator")
f1 = open("verilator_compile.log","w")
subprocess.call(["sh","./compile_sauria.sh",sauria_version],stdout=f1)
os.chdir(cwd)

In [None]:
# Convolution options:
C_in = 64       # Input Channels
C_out = 64      # Output Channels
Kh,Kw = 3,3     # Kernel size
s = 1           # Strides
d = 1           # Dilation coefficient
#p = 0          # Padding (UNSUPPORTED ATM!)

# Define pytorch convolutional layer (randomly initialized weights & biases)
# B_conv_torch = torch.nn.Conv2d(C_in, C_out, (Kh, Kw), stride=s, dilation=d, dtype=torch.int8)

# Output tensor shape
Cw = 64         # Output tensor width
Ch = 64         # Output tensor height

# Input tensor shape determined by output tensor shape
Aw = (1+s*(Cw-1)) + (1+d*(Kw-1)) - 1
Ah = (1+s*(Ch-1)) + (1+d*(Kh-1)) - 1

# Randomly generate input tensors
tensor_A_torch = torch.randint(-127,127, (C_in, Ah, Aw), dtype=torch.int8)

# Randomly generate weights and biases
tensor_B_torch = torch.randint(-127,127, (C_out, C_in, Kh, Kw), dtype=torch.int8)
tensor_bias_torch = torch.randint(-127,127, (C_out, 1, 1), dtype=torch.int8)

# Perform convolution with Pytorch and print result
tensor_C_torch = tensor_bias_torch + torch.nn.functional.conv2d(tensor_A_torch.double(),tensor_B_torch.double(),stride=s,padding=0,dilation=d)
tensor_C_torch = tensor_C_torch.int()

print(tensor_C_torch.shape)
print(tensor_C_torch[:3,:3,:3])

In [None]:
# Input tensor is the same, but converted to numpy
tensor_A = np.array(tensor_A_torch.detach())

# Weights tensor is obtained from the conv layer (randomly generated)
tensor_B = np.array(tensor_B_torch.detach())

# Bias can be added by preloading data into the array
# (This is OPTIONAL! It adds the cost of replicating the data!)
# (However, it is useful as an example of data preloading)
bias_numpy = np.array(tensor_bias_torch.detach())
preload_C = np.zeros([C_out,Ch,Cw])
preload_C[:,:,:] = np.reshape(bias_numpy,[C_out,1,1])

# Convert result into numpy to compare
tensor_C = np.array(tensor_C_torch.detach())

print(tensor_C.shape)
print(tensor_C[:3,:3,:3])

In [None]:
np.set_printoptions(formatter={'int':hex})
tensor_B[:20,0,0,0].astype(np.uint8)

In [None]:
# Dictionary of hardware parameters describing the version of SAURIA
HW_PARAMS = hwv.get_params(sauria_version)

# Array with the tensor shapes to compute
tensor_shapes = [tensor_A.shape, tensor_B.shape, tensor_C.shape]

# Dictionary describing the tiling sizes
TILING_DICT = {
    'C_tile_shape'  :   [64,8,64],  #[C_out, Ch, Cw]
    'tile_cin'      :   32,
    'X_used'        :   32,
    'Y_used'        :   32
}

# Dictionary fully describing the convolution to compute
CONV_DICT = slib.get_conv_dict(tensor_shapes, TILING_DICT, HW_PARAMS, d=d, s=s, preloads=True)

print(CONV_DICT)

In [None]:
from importlib import reload
import src.config_helper as cfg
reload(slib)
reload(hwv)
reload(cfg)

SAURIA_outputs, SAURIA_stats = slib.Conv2d_SAURIA(tensor_A, tensor_B, preload_C, tensor_C, CONV_DICT, HW_PARAMS, generate_vcd=False, print_statistics=True, silent=False)

In [None]:
# Print and compare to Pytorch result
print("From Pytorch:")
print(tensor_C[:3,:3,:3])

print("\nFrom SAURIA:")
SAURIA_outputs = SAURIA_outputs.astype(np.int32)
print(SAURIA_outputs[:3,:3,:3])

print("\nAverage absolute error:")
print(np.abs(SAURIA_outputs - tensor_C).mean())