In [None]:
import os
import sys
cur_dir = os.getcwd()
aries_path = cur_dir + "/../../../../"
sys.path.append(aries_path)
from frontend import *
from IPython import get_ipython

In [None]:
# Vector Add: C[i0] += A[i0] * B[i0]
I = 512
TI = 32
grid = (I // TI, ) # grid must be a tuple

In [None]:
@task_kernel()
def kernel_add(TileA: float32[TI], TileB: float32[TI], TileC: float32[TI]):
    for i0 in range(0, TI):
        TileC[i0] = TileA[i0] + TileB[i0]

In [None]:
@task_tile()
def vadd(A: float32[I], B: float32[I], C: float32[I], **kwargs):
    i = aries.tile_ranks(**kwargs)

    # Compute tile slices for multiple dimensions
    ti = aries.arange(i*TI, (i+1)*TI)  # I tile range
    
    L1_A = aries.buffer((TI, ), "float32")
    L1_B = aries.buffer((TI, ), "float32")
    L1_C = aries.buffer((TI, ), "float32")
    
    L1_A = aries.load(A, (ti, ))
    L1_B = aries.load(B, (ti, ))
    kernel_add(L1_A, L1_B, L1_C)
    aries.store(L1_C, C, (ti, ))

In [None]:
@task_top()
def top(A: float32[I], B: float32[I], C: float32[I]):
    gemm_vadd = vadd[grid](A, B, C)
    return gemm_vadd

In [None]:
# Get the input cells that contains the decorators
cell_codes = get_ipython().user_ns["In"][2:6]
# Join them into one string, with a newline between each cell
all_code = "\n".join(cell_codes)

In [None]:
# Initialize the buffers
np.random.seed(0)
A = np.random.rand(I).astype(np.float32)
B = np.random.rand(I).astype(np.float32)
C = np.zeros((I)).astype(np.float32)

# Execute on CPU
vadd_task = top(A, B, C)
D = np.add(A, B)
print(np.allclose(C, D))

# Generate files for on-board test
aries.gen_sim([A, B, D])

In [None]:
# Apply schedulings
sch = Schedule(vadd_task)
sch.to("VCK190")

# Set the project dir and template dir
prj_dir= cur_dir + '/project_vadd'
temp_dir= aries_path + '/templates'
# Generate Initial MLIR and ARIES Opts
sch.build(all_code, prj_dir, temp_dir)