diff --git a/python/tvm/contrib/hexagon/build.py b/python/tvm/contrib/hexagon/build.py index fd74eb7738cf..1664ee4b1184 100644 --- a/python/tvm/contrib/hexagon/build.py +++ b/python/tvm/contrib/hexagon/build.py @@ -182,9 +182,14 @@ def upload(self, local_path: Union[str, pathlib.Path], remote_filename: str): assert self._workspace self._copy_to_remote(local_path, os.path.join(str(self._workspace), remote_filename)) - def start_session(self) -> Session: + def start_session(self, session_name: str = "hexagon-rpc") -> Session: """Connect to the RPC server. + Parameters + ---------- + session_name : str + RPC session name. + Returns ------- Session : @@ -197,7 +202,7 @@ def start_session(self) -> Session: "timeout": 0, "key": self._device_key, } - return Session(self, hexagon_remote_kw) + return Session(self, hexagon_remote_kw, session_name=session_name) def load_module(self, module: Union[str, pathlib.Path, tvm.runtime.Module], session: Session): """Load TVM module. diff --git a/python/tvm/contrib/hexagon/session.py b/python/tvm/contrib/hexagon/session.py index 56bd1b79faad..7d2eecbc2c28 100644 --- a/python/tvm/contrib/hexagon/session.py +++ b/python/tvm/contrib/hexagon/session.py @@ -60,15 +60,16 @@ def __init__( rpc_receive_buffer_size_bytes: int = 2 * 1024 * 1024, ): self._launcher = launcher - self._session_name = session_name - self._remote_stack_size_bytes = remote_stack_size_bytes - self._rpc_receive_buffer_size_bytes = rpc_receive_buffer_size_bytes - self._remote_kw = remote_kw + self._session_name: str = session_name + self._remote_stack_size_bytes: int = remote_stack_size_bytes + self._rpc_receive_buffer_size_bytes: int = rpc_receive_buffer_size_bytes + self._remote_kw: dict = remote_kw self._rpc = None - self.device = None + self._requires_cpu_device = False + self._device = None def __enter__(self): - if self.device: + if self._rpc: # Already initialized return self @@ -86,7 +87,6 @@ def __enter__(self): self._rpc_receive_buffer_size_bytes, ], ) - self.device = self._rpc.hexagon(0) return self except RuntimeError as exception: @@ -95,6 +95,20 @@ def __enter__(self): def __exit__(self, exc_type, exc_value, exc_traceback): pass + @property + def device(self): + """Session device.""" + + if self._device is not None: + return self._device + + if self._requires_cpu_device: + self._device = self._rpc.cpu(0) + else: + self._device = self._rpc.hexagon(0) + + return self._device + def upload(self, local_path: Union[str, pathlib.Path], remote_filename: str): """Upload a local file to the remote workspace. @@ -133,9 +147,7 @@ def load_module(self, module: Union[str, pathlib.Path, tvm.runtime.Module]): TVM module object. """ - assert ( - self.device is not None - ), "Hexagon session must be started using __enter__ prior to use" + assert self._rpc is not None, "Hexagon session must be started using __enter__ prior to use" if isinstance(module, tvm.runtime.Module): with tempfile.TemporaryDirectory() as temp_dir: @@ -179,6 +191,7 @@ def get_graph_executor( """ graph_mod = self.load_module(module_name) + self._set_device_type(graph_mod) return tvm.contrib.graph_executor.create(graph_json, graph_mod, self.device) def get_aot_executor( @@ -206,6 +219,7 @@ def get_aot_executor( """ aot_mod = self.load_module(module_name) + self._set_device_type(aot_mod) return tvm.runtime.executor.AotModule(aot_mod["default"](self.device)) def get_executor_from_factory(self, module: ExecutorFactoryModule): @@ -226,6 +240,28 @@ def get_executor_from_factory(self, module: ExecutorFactoryModule): raise TypeError(f"Unsupported executor type: {type(module)}") + def _set_device_type(self, module: Union[str, pathlib.Path, GraphExecutorFactoryModule]): + """Set session device type(hexagon, cpu) based on target in module. + + Parameters + ---------- + + module: TVMModule + TVM module object. + """ + # for cases when module is a single schedule without target attribute. + if not hasattr(module, "target"): + self._requires_cpu_device = False + else: + assert len(module.target.values()) == 1 + for target in module.target.values(): + target_type = str(target).split()[0] + + if target_type == "llvm": + self._requires_cpu_device = True + else: + self._requires_cpu_device = False + def _graph_executor_from_factory( self, module: Union[str, pathlib.Path, GraphExecutorFactoryModule], @@ -286,6 +322,12 @@ def _aot_executor_from_factory( for target in module.target.values() if "hexagon" in target.keys ) + + self._set_device_type(module) + + for target in module.target.values(): + target_type = str(target).split()[0] + assert hexagon_arch, "No hexagon target architecture found" assert len(hexagon_arch) == 1, f"Inconsistent hexagon architecture found, {hexagon_arch}" hexagon_arch = hexagon_arch.pop() @@ -295,11 +337,22 @@ def _aot_executor_from_factory( binary_name = "test_binary.so" binary_path = temp_dir / binary_name - module.export_library( - str(binary_path), - fcompile=hexagon.create_aot_shared, - hexagon_arch=hexagon_arch, - ) + if target_type == "hexagon": + module.export_library( + str(binary_path), + fcompile=hexagon.create_aot_shared, + hexagon_arch=hexagon_arch, + ) + elif target_type == "llvm": + module.export_library( + str(binary_path), + cc=hexagon.hexagon_clang_plus(), + ) + else: + raise ValueError( + f"Incorrect Target kind.\n" + f"Target kind should be from these options: [hexagon, llvm]." + ) self.upload(binary_path, binary_name) diff --git a/python/tvm/script/tir/__init__.pyi b/python/tvm/script/tir/__init__.pyi index 3eb383ed9974..9727a8db6316 100644 --- a/python/tvm/script/tir/__init__.pyi +++ b/python/tvm/script/tir/__init__.pyi @@ -226,6 +226,7 @@ def alloc_buffer( """ special_stmt - Reads/Writes """ + @overload def reads(read_regions: List[BufferSlice]) -> None: ... @overload @@ -337,6 +338,7 @@ def Assert(condition: Union[PrimExpr, builtins.bool], message: str) -> PrimExpr: """ Scope handler - Loops """ + @overload def serial( begin: Union[PrimExpr, int], diff --git a/src/relay/backend/aot_executor_codegen.cc b/src/relay/backend/aot_executor_codegen.cc index c2b2ac0fc5e2..9a194965ded4 100644 --- a/src/relay/backend/aot_executor_codegen.cc +++ b/src/relay/backend/aot_executor_codegen.cc @@ -1234,12 +1234,12 @@ class AOTExecutorCodegenModule : public runtime::ModuleNode { Target target_host; for (const auto& it : tmp) { auto dev_type = it.first.as(); - if (!target_host.defined() && it.second->kind->device_type == kDLCPU) { + // TODO(tvm-team): AoT only works with kDLCPU device type. We can remove kDLHexagon + // here once we refactored kDLHexagon to kDLCPU. + if (!target_host.defined() && ((it.second->kind->device_type == kDLCPU) || + (it.second->kind->device_type == kDLHexagon))) { target_host = it.second; } - if (!target_host.defined() && it.second->kind->device_type == kDLHexagon) { - target_host = *(new Target("c")); - } ICHECK(dev_type); targets[static_cast(dev_type->value)] = it.second; } diff --git a/src/runtime/hexagon/hexagon/hexagon_device_api_v2.cc b/src/runtime/hexagon/hexagon/hexagon_device_api_v2.cc index 5a7642abeb55..ebd826b2c7b3 100644 --- a/src/runtime/hexagon/hexagon/hexagon_device_api_v2.cc +++ b/src/runtime/hexagon/hexagon/hexagon_device_api_v2.cc @@ -84,6 +84,7 @@ void* HexagonDeviceAPIv2::AllocDataSpace(Device dev, int ndim, const int64_t* sh void* HexagonDeviceAPIv2::AllocDataSpace(Device dev, size_t nbytes, size_t alignment, DLDataType type_hint) { + // Added kDLCPU since we use hexagon as a sub-target of LLVM which by default maps to kDLCPU; bool is_valid_device = (TVMDeviceExtType(dev.device_type) == kDLHexagon) || (DLDeviceType(dev.device_type) == kDLCPU); CHECK(is_valid_device) << "dev.device_type: " << dev.device_type; @@ -94,6 +95,7 @@ void* HexagonDeviceAPIv2::AllocDataSpace(Device dev, size_t nbytes, size_t align } void HexagonDeviceAPIv2::FreeDataSpace(Device dev, void* ptr) { + // Added kDLCPU since we use hexagon as a sub-target of LLVM which by default maps to kDLCPU; bool is_valid_device = (TVMDeviceExtType(dev.device_type) == kDLHexagon) || (DLDeviceType(dev.device_type) == kDLCPU); CHECK(is_valid_device) << "dev.device_type: " << dev.device_type; @@ -107,12 +109,18 @@ struct HexagonWorkspacePool : public WorkspacePool { }; void* HexagonDeviceAPIv2::AllocWorkspace(Device dev, size_t size, DLDataType type_hint) { - CHECK(TVMDeviceExtType(dev.device_type) == kDLHexagon) << "dev.device_type: " << dev.device_type; + // Added kDLCPU since we use hexagon as a sub-target of LLVM which by default maps to kDLCPU; + bool is_valid_device = (TVMDeviceExtType(dev.device_type) == kDLHexagon) || + (DLDeviceType(dev.device_type) == kDLCPU); + CHECK(is_valid_device) << "dev.device_type: " << dev.device_type; return dmlc::ThreadLocalStore::Get()->AllocWorkspace(dev, size); } void HexagonDeviceAPIv2::FreeWorkspace(Device dev, void* data) { - CHECK(TVMDeviceExtType(dev.device_type) == kDLHexagon) << "dev.device_type: " << dev.device_type; + // Added kDLCPU since we use hexagon as a sub-target of LLVM which by default maps to kDLCPU; + bool is_valid_device = (TVMDeviceExtType(dev.device_type) == kDLHexagon) || + (DLDeviceType(dev.device_type) == kDLCPU); + CHECK(is_valid_device) << "dev.device_type: " << dev.device_type; CHECK(hexagon_buffer_map_.count(data) != 0) << "Attempt made to free unknown or already freed workspace allocation"; dmlc::ThreadLocalStore::Get()->FreeWorkspace(dev, data); diff --git a/src/runtime/hexagon/rpc/hexagon/rpc_server.cc b/src/runtime/hexagon/rpc/hexagon/rpc_server.cc index af91dd3b4e6d..f352eb7e0828 100644 --- a/src/runtime/hexagon/rpc/hexagon/rpc_server.cc +++ b/src/runtime/hexagon/rpc/hexagon/rpc_server.cc @@ -61,8 +61,7 @@ class HexagonIOHandler { void MessageStart(size_t message_size_bytes) {} ssize_t PosixWrite(const uint8_t* buf, size_t write_len_bytes) { - LOG(INFO) << "INFO: HexagonIOHandler PosixWrite called, write_len_bytes(" << write_len_bytes - << ")"; + LOG(INFO) << "HexagonIOHandler PosixWrite called, write_len_bytes(" << write_len_bytes << ")"; int32_t written_size = write_buffer_.sputn(reinterpret_cast(buf), write_len_bytes); if (written_size != write_len_bytes) { LOG(ERROR) << "written_size(" << written_size << ") != write_len_bytes(" << write_len_bytes @@ -72,10 +71,10 @@ class HexagonIOHandler { return (ssize_t)written_size; } - void MessageDone() { LOG(INFO) << "INFO: Message Done."; } + void MessageDone() { LOG(INFO) << "Message Done."; } ssize_t PosixRead(uint8_t* buf, size_t read_len_bytes) { - LOG(INFO) << "INFO: HexagonIOHandler PosixRead called, read_len_bytes(" << read_len_bytes + LOG(INFO) << "HexagonIOHandler PosixRead called, read_len_bytes(" << read_len_bytes << "), read_buffer_index_(" << read_buffer_index_ << ")"; uint32_t bytes_to_read = 0; @@ -99,7 +98,7 @@ class HexagonIOHandler { * \return The status */ AEEResult SetReadBuffer(const uint8_t* data, size_t data_size_bytes) { - LOG(INFO) << "INFO: HexagonIOHandler SetReadBuffer: data_size_bytes(" << data_size_bytes + LOG(INFO) << "HexagonIOHandler SetReadBuffer: data_size_bytes(" << data_size_bytes << "), read_buffer_index_(" << read_buffer_index_ << "), read_buffer_size_bytes_(" << read_buffer_size_bytes_ << ")"; if (data_size_bytes > read_buffer_size_bytes_) { @@ -121,7 +120,7 @@ class HexagonIOHandler { * \return The size of data that is read in bytes. */ int64_t ReadFromWriteBuffer(uint8_t* buf, size_t read_size_bytes) { - LOG(INFO) << "INFO: HexagonIOHandler ReadFromWriteBuffer called, read_size_bytes: " + LOG(INFO) << "HexagonIOHandler ReadFromWriteBuffer called, read_size_bytes: " << read_size_bytes; int64_t size = (int64_t)write_buffer_.sgetn(reinterpret_cast(buf), read_size_bytes); write_buffer_available_length_ -= size; @@ -133,7 +132,7 @@ class HexagonIOHandler { return size; } - void Close() { LOG(INFO) << "INFO: HexagonIOHandler Close called"; } + void Close() { LOG(INFO) << "HexagonIOHandler Close called"; } void Exit(int code) { exit(code); } @@ -156,13 +155,20 @@ class HexagonRPCServer { * \param data The data pointer * \param data_size_bytes The data size in bytes. * - * \return The size of data written to IOHandler. + * \return The size of data written to IOHandler if no error. + * Otherwise, returns -1; */ int64_t Write(const uint8_t* data, size_t data_size_bytes) { - if (io_.SetReadBuffer(data, data_size_bytes) != AEE_SUCCESS) { + AEEResult rc = io_.SetReadBuffer(data, data_size_bytes); + if (rc != AEE_SUCCESS) { + LOG(ERROR) << "ERROR: SetReadBuffer failed: " << rc; + return -1; + } + + if (!rpc_server_.ProcessOnePacket()) { + LOG(ERROR) << "ERROR: ProcessOnePacket failed"; return -1; } - rpc_server_.ProcessOnePacket(); return (int64_t)data_size_bytes; } @@ -211,6 +217,8 @@ const tvm::runtime::PackedFunc get_runtime_func(const std::string& name) { void reset_device_api() { const tvm::runtime::PackedFunc api = get_runtime_func("device_api.hexagon.v2"); tvm::runtime::Registry::Register("device_api.hexagon", true).set_body(api); + // Registering device_api.cpu as device_api.hexagon.v2 since we use hexagon as sub-target of LLVM. + tvm::runtime::Registry::Register("device_api.cpu", true).set_body(api); } int __QAIC_HEADER(hexagon_rpc_open)(const char* uri, remote_handle64* handle) { diff --git a/src/runtime/hexagon/rpc/simulator/rpc_server.cc b/src/runtime/hexagon/rpc/simulator/rpc_server.cc index 76f168cd20ad..dee324ec1cb4 100644 --- a/src/runtime/hexagon/rpc/simulator/rpc_server.cc +++ b/src/runtime/hexagon/rpc/simulator/rpc_server.cc @@ -292,6 +292,7 @@ int main() { const auto* api_v2 = tvm::runtime::Registry::Get("device_api.hexagon.v2"); ICHECK(api_v2 != nullptr); tvm::runtime::Registry::Register("device_api.hexagon", true).set_body(*api_v2); + tvm::runtime::Registry::Register("device_api.cpu", true).set_body(*api_v2); tvm::runtime::hexagon::SimulatorRPCServer server; diff --git a/src/runtime/hexagon/rpc/simulator/session.cc b/src/runtime/hexagon/rpc/simulator/session.cc index b0f71c7bcf8a..2c1f4003f1c1 100644 --- a/src/runtime/hexagon/rpc/simulator/session.cc +++ b/src/runtime/hexagon/rpc/simulator/session.cc @@ -214,6 +214,11 @@ class SimulatorRPCChannel final : public RPCChannel { std::string runmain; // Path to run_main_on_hexagon. }; + struct Message_ { + Message msg; + std::string str() const; + }; + Message SendMsg(Message msg); Message SendMsg(uint32_t code, uint32_t len, uint32_t va); void ReadFromProcess(void* host_dst, HEX_VA_t src, size_t len); @@ -461,6 +466,27 @@ std::string SimulatorRPCChannel::Cpu_::str() const { return default_cpu_; } +std::string SimulatorRPCChannel::Message_::str() const { + switch (msg.code) { + case Message::kNone: + return "kNone"; + case Message::kAck: + return "kAck"; + case Message::kTerminate: + return "kTerminate"; + case Message::kReceiveStart: + return "kReceiveStart"; + case Message::kReceiveEnd: + return "kReceiveEnd"; + case Message::kSendStart: + return "kSendStart"; + case Message::kSendEnd: + return "kSendEnd"; + default: + break; + } +} + SimulatorRPCChannel::SDKInfo_::SDKInfo_(const std::string& sdk_root, const std::string& cpu) : root(sdk_root) { // For v69 chips, still look for v68 in the directory names. @@ -524,6 +550,7 @@ SimulatorRPCChannel::SimulatorRPCChannel(int stack_size, std::string args) { const auto* api_v2 = tvm::runtime::Registry::Get("device_api.hexagon.v2"); ICHECK(api_v2 != nullptr); tvm::runtime::Registry::Register("device_api.hexagon", true).set_body(*api_v2); + tvm::runtime::Registry::Register("device_api.cpu", true).set_body(*api_v2); const char* sdk_root_env = std::getenv("HEXAGON_SDK_ROOT"); ICHECK(sdk_root_env != nullptr) << "Please set HEXAGON_SDK_ROOT"; @@ -651,9 +678,14 @@ Message SimulatorRPCChannel::SendMsg(Message msg) { HEX_4u_t result; core = sim_->Run(&result); - ICHECK_EQ(core, HEX_CORE_BREAKPOINT); + Core_ core_ = {core}; + ICHECK_EQ(core, HEX_CORE_BREAKPOINT) + << "Expecting HEX_CORE_BREAKPOINT, received: " << core_.str(); }; + Message_ msg_ = {msg}; + LOG(INFO) << "Sending message: " << msg_.str(); + WriteToProcess(message_buffer_v_, &msg, sizeof msg); run(); diff --git a/src/runtime/library_module.cc b/src/runtime/library_module.cc index 7efa91d912eb..54fd362387c5 100644 --- a/src/runtime/library_module.cc +++ b/src/runtime/library_module.cc @@ -115,8 +115,8 @@ Module LoadModuleFromBinary(const std::string& type_key, dmlc::Stream* stream) { loaders += name.substr(loadkey.size()); } } - LOG(FATAL) << "Binary was created using " << type_key - << " but a loader of that name is not registered. Available loaders are " << loaders + LOG(FATAL) << "Binary was created using {" << type_key + << "} but a loader of that name is not registered. Available loaders are " << loaders << ". Perhaps you need to recompile with this runtime enabled."; } diff --git a/src/target/llvm/codegen_hexagon.cc b/src/target/llvm/codegen_hexagon.cc index 9f7ee6194117..035f772f8d6c 100644 --- a/src/target/llvm/codegen_hexagon.cc +++ b/src/target/llvm/codegen_hexagon.cc @@ -475,6 +475,12 @@ runtime::Module BuildHexagon(IRModule mod, Target target) { TVM_REGISTER_GLOBAL("target.build.hexagon").set_body_typed(BuildHexagon); +TVM_REGISTER_GLOBAL("tvm.codegen.llvm.target_hexagon") + .set_body([](const TVMArgs& targs, TVMRetValue* rv) { + CodeGenLLVM* cg = new CodeGenHexagon(); + *rv = static_cast(cg); + }); + } // namespace codegen } // namespace tvm diff --git a/tests/python/contrib/test_hexagon/conftest.py b/tests/python/contrib/test_hexagon/conftest.py index 009150b1081c..7a90317d5506 100644 --- a/tests/python/contrib/test_hexagon/conftest.py +++ b/tests/python/contrib/test_hexagon/conftest.py @@ -202,3 +202,19 @@ def terminate_rpc_servers(): yield [] if serial == "simulator": os.system("ps ax | grep tvm_rpc_x86 | awk '{print $1}' | xargs kill") + + +aot_host_target = tvm.testing.parameter( + "c", + "llvm -keys=hexagon -link-params=0 -mattr=+hvxv68,+hvx-length128b,+hvx-qfloat,-hvx-ieee-fp -mcpu=hexagonv68 -mtriple=hexagon", +) + + +@tvm.testing.fixture +def aot_target(aot_host_target): + if aot_host_target == "c": + yield tvm.target.hexagon("v68") + elif aot_host_target.startswith("llvm"): + yield aot_host_target + else: + assert False, "Incorrect AoT host target: {aot_host_target}. Options are [c, llvm]." diff --git a/tests/python/contrib/test_hexagon/test_launcher.py b/tests/python/contrib/test_hexagon/test_launcher.py index c2152cf62355..48b3dac2a2c9 100644 --- a/tests/python/contrib/test_hexagon/test_launcher.py +++ b/tests/python/contrib/test_hexagon/test_launcher.py @@ -16,18 +16,14 @@ # under the License. import os -import pathlib import sys import pytest import numpy as np -import logging import tvm.testing from tvm import te from tvm import relay from tvm.relay.backend import Executor, Runtime -from tvm.contrib import utils, ndk -from tvm.contrib.hexagon.build import HexagonLauncher import tvm.contrib.hexagon as hexagon from .conftest import requires_hexagon_toolchain @@ -72,6 +68,7 @@ def test_add_vtcm(hexagon_session): ) mod = hexagon_session.load_module(func) + A_data = tvm.nd.empty(A.shape, A.dtype, hexagon_session.device, "global.vtcm") A_data.copyfrom(np.array([2, 3])) @@ -271,7 +268,7 @@ def _workaround_create_aot_shared(): @requires_hexagon_toolchain -def test_aot_executor(hexagon_session): +def test_aot_executor(hexagon_session, aot_host_target, aot_target): dtype = "float32" input_shape = (1, 128, 128, 3) w_shape = (5, 5, 3, 8) @@ -290,8 +287,6 @@ def test_aot_executor(hexagon_session): relay_mod = tvm.IRModule.from_expr(f) relay_mod = relay.transform.InferType()(relay_mod) - target_hexagon = tvm.target.hexagon("v68") - weight_data = np.random.rand(w_shape[0], w_shape[1], w_shape[2], w_shape[3]).astype(dtype=dtype) input_data = np.random.rand( input_shape[0], input_shape[1], input_shape[2], input_shape[3] @@ -304,7 +299,7 @@ def test_aot_executor(hexagon_session): lowered = tvm.relay.build( relay_mod, params=params, - target=tvm.target.Target(target_hexagon, host="c"), + target=tvm.target.Target(aot_target, host=aot_host_target), runtime=Runtime("cpp"), executor=Executor("aot", {"unpacked-api": False, "interface-api": "packed"}), ) @@ -332,7 +327,7 @@ def test_aot_executor(hexagon_session): @requires_hexagon_toolchain -def test_aot_executor_multiple_conv2d(hexagon_session): +def test_aot_executor_multiple_conv2d(hexagon_session, aot_host_target, aot_target): dtype = "float32" input_shape = (1, 8, 8, 3) w1_shape = (5, 5, 3, 1) @@ -362,8 +357,6 @@ def test_aot_executor_multiple_conv2d(hexagon_session): relay_mod = tvm.IRModule.from_expr(f) relay_mod = relay.transform.InferType()(relay_mod) - target_hexagon = tvm.target.hexagon("v68") - weight1_data = np.random.rand(w1_shape[0], w1_shape[1], w1_shape[2], w1_shape[3]).astype( dtype=dtype ) @@ -381,7 +374,7 @@ def test_aot_executor_multiple_conv2d(hexagon_session): lowered = tvm.relay.build( relay_mod, params=params, - target=tvm.target.Target(target_hexagon, host="c"), + target=tvm.target.Target(aot_target, host=aot_host_target), runtime=Runtime("cpp"), executor=Executor("aot", {"unpacked-api": False, "interface-api": "packed"}), )