In [None]:
# Imports
import pandas as pd
from random import randint
from src import *
from src.simulator import SIMULATOR

In [None]:
sim = SIMULATOR()

# --------------------------------------------
#               KERNEL CONFIGURATION
# --------------------------------------------
kernel_path = './kernels/mmul/'
kernel_number = 1 
column_usage = [True, False] 
nInstrPerCol = 11 
imem_add_start = 0 
srf_spm_addres = 0 
version=""

sim.kernel_config(column_usage, nInstrPerCol, imem_add_start, srf_spm_addres, kernel_number)

In [None]:
# --------------------------------------------
#                LOAD SPM DATA
# --------------------------------------------
# SPM[0] = SRF (zeros)
# SPM[1] -- SPM[32] = B_block_special (duplicated the cols for each RC)
# SPM[33] = C_block (zeros)
# SPM[34] = A_block
# --------------------------------------------
# Matrix A: 4x32 (nRCs x nElemsSlice)     -> SPM[2]
# Matrix C: 4x32 (nRowsA x nElemsSlice)   -> SPM[1]
# Matrix B: 32x32 (nColsA x nColsC)       -> SPM[3], ... , SPM[34] 
# --------------------------------------------
# SRF[0] = Number of cols on the A block (= nColsA -1 = 31)
# SRF[1] = Number of elements of C on each RC slice (= nElemsOfCPerRC -1 = 31)
# SRF[2] = Line of the SPM where the block of C is stored (= 33)
# --------------------------------------------

# Defs
nRowsA = 4
nColsA = 32
nColsB = 32
nRCs = 4
srf_spm_line = 0
c_spm_line = 33
a_spm_line = c_spm_line + 1
b_spm_first_line = 1

# Load SRF
srf = [0 for i in range(SPM_NWORDS)]
srf[0] = nColsA -1 # Last index of the number of cols on the A block
nColsC = nColsB
nElemsOfCPerRC = nColsC
last_idx_rc_slice = nElemsOfCPerRC -1
srf[1] = last_idx_rc_slice # Last index of the number of elements of C on each RC slice
srf[2] = 33 # Line of the SPM where the block of C is stored
sim.setSPMLine(srf_spm_line, srf.copy())

# Load matrix C
vector_C = [0 for i in range(SPM_NWORDS)]
sim.setSPMLine(c_spm_line, vector_C.copy())

# Load matrix A
# matrix_A = np.random.randint(1, 15, size=(nRowsA, nColsA))
# vector_A = matrix_A.flatten()
A = [1,2]
vector_A = [A[i%2] for i in range(nRowsA*nColsA)]
sim.setSPMLine(a_spm_line, vector_A.copy())

# Load matrix B
matrix_B = np.random.randint(1, 3, size=(nColsA, nColsB))
b_spm_line = b_spm_first_line
for col in range(nColsB):
    vector_aux = []
    vector_aux = np.tile(matrix_B[:,col], 4)
    sim.setSPMLine(b_spm_line, vector_aux.copy())
    b_spm_line+=1

sim.displaySPMLine(0)
sim.displaySPMLine(1)
sim.displaySPMLine(33)
sim.displaySPMLine(34)

In [None]:
# --------------------------------------------
#              COMPILE ASM TO HEX
# --------------------------------------------
sim.compileAsmToHex(kernel_path, kernel_number, version=version)

Finally, we load the kernel into the internal memory of the specialized units and run it.

In [None]:
# --------------------------------------------
#                 LOAD KERNEL
# --------------------------------------------

# This needs the hex instructions, if you don't provide them, generate then compiling the asm
sim.kernel_load(kernel_path, version=version + "_autogen", kernel_number=kernel_number)

# --------------------------------------------
#               SIMULATE EXECUTION
# --------------------------------------------
show_lcu = []
show_srf = []
show_lsu = []
show_rcs = [[],[],[],[]]
show_mxcu = []
display_ops = [show_lcu, show_lsu, show_mxcu, show_rcs, show_srf]

sim.run(kernel_number, display_ops=display_ops)

We can check it more rigorously. We can define our function in python and check that the output matches the CGRA output.

In [None]:
def mmul (in_A, in_B, nRowsA, nColsA, nColsB, out):
    for i in range(nRowsA):
        for j in range(nColsB):
            sum = 0
            for k in range(nColsA):
                #print(f"i: {i}, j: {j}, k: {k}")
                sum += in_A[i*nColsA + k] * in_B[k*nColsB + j]
            out[i*nColsB + j] = sum

In [None]:
disco_cgra_res = sim.getSPMLine(c_spm_line)
errors_idx = []
expected_output = [0 for i in range(nRowsA*nColsB)]
mmul(vector_A, matrix_B.flatten(), nRowsA, nColsA, nColsB, expected_output)
for i in range(len(expected_output)):
    if expected_output[i] != disco_cgra_res[i]:
        errors_idx.append(i)
if len(errors_idx) == 0:
    print("The result is correct!")
else:
    print("Oops, something went wrong. There are " + str(len(errors_idx)) + " errors.")
    print(errors_idx)
    print("DISCO-CGRA result:")
    sim.displaySPMLine(c_spm_line)
    print("Expected result:")
    print(expected_output)