# Imports

In [None]:
import numpy as np
import sys

from aie.dialects.aie import *
from aie.dialects.aiex import *
from aie.extras.context import mlir_mod_ctx
from aie.helpers.dialects.ext.func import func
from aie.helpers.dialects.ext.scf import _for as range_

from aie.utils.xrt import setup_aie, execute as execute_on_aie
import aie.utils.test as test_utils

# Design Configurations

In [None]:
VECTOR_SIZE = 4096

# Passthrough PyKernel Design

In [None]:
def passthroughKernel(vector_size):
    if vector_size % 64 != 0 or vector_size < 512:
        raise ValueError(
            "Vector size must be a multiple of 64 and greater than or equal to 512"
        )
    N = vector_size
    lineWidthInBytes = N // 4  # chop input in 4 sub-tensors

    @device(AIEDevice.npu1_1col)
    def device_body():
        # define types
        line_ty = np.ndarray[(lineWidthInBytes,), np.dtype[np.uint8]]

        # AIE Core Python Function declarations
        @func(emit=True)
        def passThroughLine(input: line_ty, output: line_ty, lineWidth: np.int32):
            for i in range_(lineWidth):
                output[i] = input[i]

        # Tile declarations
        ShimTile = tile(0, 0)
        ComputeTile2 = tile(0, 2)

        # AIE-array data movement with object fifos
        of_in = object_fifo("in", ShimTile, ComputeTile2, 2, line_ty)
        of_out = object_fifo("out", ComputeTile2, ShimTile, 2, line_ty)

        # Set up compute tiles

        # Compute tile 2
        @core(ComputeTile2)
        def core_body():
            for _ in range_(sys.maxsize):
                elemOut = of_out.acquire(ObjectFifoPort.Produce, 1)
                elemIn = of_in.acquire(ObjectFifoPort.Consume, 1)
                passThroughLine(elemIn, elemOut, lineWidthInBytes)
                of_in.release(ObjectFifoPort.Consume, 1)
                of_out.release(ObjectFifoPort.Produce, 1)

        #    print(ctx.module.operation.verify())

        vector_ty = np.ndarray[(N,), np.dtype[np.uint8]]

        @runtime_sequence(vector_ty, vector_ty, vector_ty)
        def sequence(inTensor, outTensor, notUsed):
            npu_dma_memcpy_nd(
                metadata=of_in,
                bd_id=0,
                mem=inTensor,
                sizes=[1, 1, 1, N],
                issue_token=True,
            )
            npu_dma_memcpy_nd(
                metadata=of_out,
                bd_id=1,
                mem=outTensor,
                sizes=[1, 1, 1, N],
            )
            dma_wait(of_in, of_out)

# Generate MLIR, print, and save to file

In [None]:
with mlir_mod_ctx() as ctx:
    passthroughKernel(VECTOR_SIZE)
    # Warning: Do not change file name without changing compilation commands below
    with open("notebook_aie.mlir", "w") as f:
        print(ctx.module)
        print(ctx.module, file=f)

# Compile the design using `aiecc`

In [None]:
!mkdir notebook_build
!cd notebook_build && aiecc.py --aie-generate-cdo --aie-generate-npu --no-compile-host --no-xchesscc --no-xbridge --xclbin-name=notebook.xclbin --npu-insts-name=notebook_insts.txt ../notebook_aie.mlir

# Test the Design use pyxrt

In [None]:
dtype = np.uint8

app = setup_aie(
    "notebook_build/notebook.xclbin",
    "notebook_build/notebook_insts.txt",
    VECTOR_SIZE,
    dtype,
    None,
    None,
    VECTOR_SIZE,
    dtype,
)
input = np.arange(1, VECTOR_SIZE + 1, dtype=dtype)
print("Running...")
aie_output = execute_on_aie(app, input)

# Copy output results and verify they are correct
errors = 0
print("Verifying results ...")
e = np.equal(input, aie_output)
errors = np.size(e) - np.count_nonzero(e)

if not errors:
    print("\nPASSED!\n")
else:
    print("\nError count: ", errors)
    print("\nFailed.\n")