55import ml_dtypes
66import pyxrt
77import ctypes
8+ import time
89from . import compilation as comp
910from .base import AIEOperatorBase , MLIROperator
1011from .utils import XRTSubBuffer
@@ -42,8 +43,7 @@ def get_kernel_artifacts(self):
4243 """Collect all kernel artifacts from child operators.
4344
4445 Returns:
45- List of KernelObjectArtifact instances from all unique child operators,
46- with filenames and symbol prefixes disambiguated per operator index.
46+ List of KernelObjectArtifact instances from all unique child operators.
4747 """
4848 kernel_artifacts = []
4949 seen : dict [int , object ] = {}
@@ -52,9 +52,6 @@ def get_kernel_artifacts(self):
5252 ]
5353 for idx , op in enumerate (unique_operators ):
5454 objs = op .get_kernel_artifacts ()
55- for obj in objs :
56- obj .filename = f"op{ idx } _{ obj .filename } "
57- obj .prefix_symbols = f"op{ idx } _"
5855 kernel_artifacts .extend (objs )
5956 return kernel_artifacts
6057
@@ -82,8 +79,6 @@ def get_mlir_artifact(self):
8279 ]
8380 for idx , op in enumerate (unique_operators ):
8481 mlir_artifact = op .get_mlir_artifact ()
85- if len (op .get_kernel_artifacts ()) > 0 :
86- mlir_artifact .generator .kwargs ["func_prefix" ] = f"op{ idx } _"
8782 op_name = f"op{ idx } _{ op .__class__ .__name__ } "
8883 op_names [id (op )] = op_name
8984 operator_mlir_map [op_name ] = mlir_artifact
@@ -290,8 +285,10 @@ def __call__(self, *args):
290285 for i , arg in enumerate (args ):
291286 assert isinstance (arg , pyxrt .bo ), f"Argument { i } is not a pyxrt.bo"
292287 run .set_arg (i , arg )
288+ t0 = time .perf_counter ()
293289 run .start ()
294290 ret_code = run .wait ()
291+ self .last_elapsed = time .perf_counter () - t0
295292 if ret_code != pyxrt .ert_cmd_state .ERT_CMD_STATE_COMPLETED :
296293 raise RuntimeError (f"Kernel execution failed with return code { ret_code } " )
297294
@@ -371,10 +368,10 @@ def get_buffer(self, buffer_name):
371368 return sub_buffer
372369
373370 def __call__ (self ):
374- self .input_buffer .to ( "npu" )
371+ self .input_buffer ._sync_to_device ( )
375372 super ().__call__ (
376373 self .input_buffer .buffer_object (),
377374 self .output_buffer .buffer_object (),
378375 self .scratch_buffer .buffer_object (),
379376 )
380- self .output_buffer .to ( "cpu" )
377+ self .output_buffer ._sync_from_device ( )
0 commit comments