diff --git a/python/tvm/micro/model_library_format.py b/python/tvm/micro/model_library_format.py index 6768e03f4473f..8d6ce9a4d8f72 100644 --- a/python/tvm/micro/model_library_format.py +++ b/python/tvm/micro/model_library_format.py @@ -27,6 +27,8 @@ from ..relay.backend import graph_executor_factory from ..relay import param_dict +MAIN_FUNC_NAME_STR = "main_func" + class UnsupportedInModelLibraryFormatError(Exception): """Raised when export_model_library_format does not support the given Module tree.""" @@ -73,8 +75,16 @@ def _populate_codegen_dir(mod, codegen_dir: str): dso_mod.save(file_name) -def _build_memory_map(graph_json): - """Build a simpler memory map from graph JSON. +def _build_memory_map(graph_json, function_metadata): + ret = { + "sids": _build_sid_map(graph_json), + "functions": _build_function_memory_map(function_metadata), + } + return ret + + +def _build_sid_map(graph_json): + """Build a simpler storage id info map from graph JSON. Parameters ---------- @@ -117,6 +127,82 @@ def _build_memory_map(graph_json): return memory_map +def _build_function_memory_map(function_metadata): + """Build a simple map that shows how much workspace is required to execute + each primitive function. The main_func describes how much memory is required + to execute the main control code. + + Parameters + ---------- + function_metadata : Map + This contains all the compiled metadata on a function basis + + Returns + ------- + dict : + This will have two entries: + 1.) A list with one entry per function describing local memory it is using. + 2.) A global memory requirement if all functions are executed sequentially + """ + device_max_workspace = dict() + num_targets = len(function_metadata[MAIN_FUNC_NAME_STR].workspace_sizes.items()) + func_entries = [] + target_local_entries = dict() + for i in range(num_targets): + for func_name, finfo in function_metadata.items(): + if func_name == MAIN_FUNC_NAME_STR: + continue + target = finfo.workspace_sizes.items()[i][0] + device_max_workspace[target] = 0 + target_local_entries[func_name] = list() + + for func_name, finfo in function_metadata.items(): + if func_name == MAIN_FUNC_NAME_STR: + continue + assert len(finfo.constant_sizes.items()) == num_targets + assert len(finfo.io_sizes.items()) == num_targets + target = finfo.workspace_sizes.items()[i][0] + workspace_size = finfo.workspace_sizes.items()[i][1] + target_entry = { + "device": int(target.kind.device_type), + "workspace_size_bytes": int(workspace_size), + } + target_local_entries[func_name].append(target_entry) + if workspace_size > device_max_workspace[target]: + device_max_workspace[target] = workspace_size + + for func_name, target_entries_ in target_local_entries.items(): + func_entry = { + "function_name": str(func_name), + "workspace": target_entries_, + } + func_entries.append(func_entry) + + target_main_entries = list() + for i in range(num_targets): + target = function_metadata[MAIN_FUNC_NAME_STR].workspace_sizes.items()[i][0] + main_func_local_workspace = function_metadata[MAIN_FUNC_NAME_STR].workspace_sizes.items()[ + i + ][1] + main_func_constants = function_metadata[MAIN_FUNC_NAME_STR].constant_sizes.items()[i][1] + main_func_io = function_metadata[MAIN_FUNC_NAME_STR].io_sizes.items()[i][1] + target_main_entries.append( + { + "device": int(target.kind.device_type), + "workspace_size_bytes": int(device_max_workspace[target]) + + int(main_func_local_workspace), + "constants_size_bytes": int(main_func_constants), + "io_size_bytes": int(main_func_io), + } + ) + + ret = { + "operator_functions": func_entries, + "main_function": target_main_entries, + } + return ret + + def export_model_library_format(mod: graph_executor_factory.GraphExecutorFactoryModule, file_name): """Export the build artifact in Model Library Format. @@ -133,10 +219,10 @@ def export_model_library_format(mod: graph_executor_factory.GraphExecutorFactory """ tempdir = utils.tempdir() metadata = { - "version": 1, + "version": 2, "model_name": mod.libmod_name, "export_datetime": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%SZ"), - "memory": _build_memory_map(mod.graph_json), + "memory": _build_memory_map(mod.graph_json, mod.function_metadata), "target": {int(k): str(v) for k, v in mod.target.items()}, "runtimes": ["graph"], } diff --git a/python/tvm/relay/backend/__init__.py b/python/tvm/relay/backend/__init__.py index 4fc2b63748db5..f4d911a22bfe2 100644 --- a/python/tvm/relay/backend/__init__.py +++ b/python/tvm/relay/backend/__init__.py @@ -16,3 +16,4 @@ # under the License. """Backend codegen modules for relay.""" from . import compile_engine +from . import utils diff --git a/python/tvm/relay/backend/_ffi_api.py b/python/tvm/relay/backend/_ffi_api.py new file mode 100644 index 0000000000000..2d27709aee0ba --- /dev/null +++ b/python/tvm/relay/backend/_ffi_api.py @@ -0,0 +1,21 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""FFI APIs for tvm.relay.backend""" +import tvm._ffi + + +tvm._ffi._init_api("relay.backend", __name__) diff --git a/python/tvm/relay/backend/graph_executor_factory.py b/python/tvm/relay/backend/graph_executor_factory.py index d6959d22e5c87..f479c0506a1e7 100644 --- a/python/tvm/relay/backend/graph_executor_factory.py +++ b/python/tvm/relay/backend/graph_executor_factory.py @@ -39,9 +39,13 @@ class GraphExecutorFactoryModule: The name of module params : dict of str to NDArray The parameters of module + function_metadata : Map of String to FunctionInfo + This holds a map function names to their information """ - def __init__(self, ir_mod, target, graph_json_str, libmod, libmod_name, params): + def __init__( + self, ir_mod, target, graph_json_str, libmod, libmod_name, params, function_metadata + ): assert isinstance(graph_json_str, string_types) fcreate = get_global_func("tvm.graph_executor_factory.create") args = [] @@ -56,6 +60,7 @@ def __init__(self, ir_mod, target, graph_json_str, libmod, libmod_name, params): self.libmod_name = libmod_name self.params = params self.iter_cnt = 0 + self.function_metadata = function_metadata def export_library(self, file_name, fcompile=None, addons=None, **kwargs): return self.module.export_library(file_name, fcompile, addons, **kwargs) diff --git a/python/tvm/relay/backend/utils.py b/python/tvm/relay/backend/utils.py new file mode 100644 index 0000000000000..f281f9d57ab88 --- /dev/null +++ b/python/tvm/relay/backend/utils.py @@ -0,0 +1,29 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""The utility functions and classes for relay backend compilation""" +from tvm.runtime import Object +from . import _ffi_api + + +class FunctionInfo(Object): + """A data structure to hold metadata of relay primitive functions""" + + def __init__(self, dummy): + self.__init_handle_by_constructor__(_ffi_api.FunctionInfo, dummy) + + def set_workspace_size(self, target, size): + _ffi_api._FunctionInfo_SetWorkspaceSize(self, target, size) diff --git a/python/tvm/relay/build_module.py b/python/tvm/relay/build_module.py index ed59ad9bdc8f9..35a52afb8b7bb 100644 --- a/python/tvm/relay/build_module.py +++ b/python/tvm/relay/build_module.py @@ -83,6 +83,7 @@ def __init__(self): self._optimize = self.mod["optimize"] self._set_params_func = self.mod["set_params"] self._get_params_func = self.mod["get_params"] + self._get_function_metadata = self.mod["get_function_metadata"] def build(self, mod, target=None, target_host=None, params=None): """ @@ -189,6 +190,12 @@ def get_module(self): """Return the built module.""" return self._get_module() + def get_function_metadata(self): + """Return the compiled function metadata. + Currently, the metadata contains workspace size required by + each PrimFunc""" + return self._get_function_metadata() + def get_params(self): """Return the updated weights.""" params = self._get_params_func() @@ -288,8 +295,9 @@ def build(ir_mod, target=None, target_host=None, params=None, mod_name="default" with tophub_context: bld_mod = BuildModule() graph_json, runtime_mod, params = bld_mod.build(mod=ir_mod, target=target, params=params) + func_metadata = bld_mod.get_function_metadata() executor_factory = _graph_executor_factory.GraphExecutorFactoryModule( - ir_mod, target, graph_json, runtime_mod, mod_name, params + ir_mod, target, graph_json, runtime_mod, mod_name, params, func_metadata ) return executor_factory diff --git a/src/relay/backend/build_module.cc b/src/relay/backend/build_module.cc index 07bb51150bee5..7b11061ac96df 100644 --- a/src/relay/backend/build_module.cc +++ b/src/relay/backend/build_module.cc @@ -75,6 +75,10 @@ struct GraphCodegen { return CallFunc>("get_external_modules", nullptr); } + Map GetFunctionMetadata() { + return CallFunc>("get_function_metadata", nullptr); + } + Map GetIRModule() { return CallFunc>("get_irmodule", nullptr); } @@ -161,6 +165,10 @@ class RelayBuildModule : public runtime::ModuleNode { return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { *rv = this->graph_codegen_->GetExternalModules(); }); + } else if (name == "get_function_metadata") { + return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { + *rv = this->graph_codegen_->GetFunctionMetadata(); + }); } else if (name == "optimize") { return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { ICHECK_EQ(args.num_args, 2); diff --git a/src/relay/backend/graph_executor_codegen.cc b/src/relay/backend/graph_executor_codegen.cc index 72989b5ba46aa..1c0e74ae0133a 100644 --- a/src/relay/backend/graph_executor_codegen.cc +++ b/src/relay/backend/graph_executor_codegen.cc @@ -27,6 +27,8 @@ #include #include #include +#include +#include #include #include @@ -56,6 +58,7 @@ struct LoweredOutput { std::string graph_json; Map lowered_funcs; Array external_mods; + Map function_metadata; std::unordered_map> params; }; @@ -189,9 +192,102 @@ class GraphExecutorCodegen : public backend::MemoizedExprTranslatorIsInstance()) { + auto tuple_type = Downcast(expr_type); + int64_t size = 0; + for (const auto& field : tuple_type->fields) { + size += CalculateRelayExprSizeBytes(field); + } + return size; + } + auto tensor_type = expr_type.as(); + auto shape = tensor_type->shape; + int num_of_elements = 1; + for (const auto& dim_index_expr : shape) { + if (dim_index_expr->IsInstance()) { + num_of_elements *= dim_index_expr.as()->value; + } else { + // If shape is dynamic, we cannot calculate workspace in compile time. + num_of_elements = 0; + } + } + auto element_size = tensor_type->dtype.bytes(); + return element_size * num_of_elements; + } + + /*! + * \brief Update the "main" control function's metadata + * + * \param func The main function that contains calls to relay primitive functions + */ + void UpdateMainWorkspaceSize(const Function& func) { + std::unordered_map> sid_workspace; + std::unordered_map device_workspace; + uint64_t params_size = 0; + uint64_t input_size = 0; + uint64_t output_size = 0; + + for (const auto& kv : storage_device_map_) { + auto sids = kv.second[0]; + auto devices = kv.second[1]; + CHECK_EQ(sids.size(), devices.size()); + for (uint32_t i = 0; i < sids.size(); i++) { + sid_workspace[devices[i]][sids[i]] = 0; + } + } + + for (const auto& kv : storage_device_map_) { + auto size_bytes = CalculateRelayExprSizeBytes(kv.first->checked_type()); + if (kv.first->IsInstance()) { + params_size += size_bytes; + continue; + } else if (kv.first->IsInstance()) { + input_size += size_bytes; + continue; + } else if (kv.first == func->body) { + output_size += size_bytes; + continue; + } + auto sids = kv.second[0]; + auto devices = kv.second[1]; + for (uint32_t i = 0; i < sids.size(); i++) { + if (size_bytes > sid_workspace[devices[i]][sids[i]]) { + sid_workspace[devices[i]][sids[i]] = size_bytes; + } + } + } + + for (const auto& dev_sid_size : sid_workspace) { + auto dev = dev_sid_size.first; + device_workspace[dev] = 0; + for (const auto& sid_size : dev_sid_size.second) { + device_workspace[dev] += sid_size.second; + } + } + + auto fi_node = make_object(); + for (const auto& dev_and_size : device_workspace) { + auto tgt = GetTargetFromInteger(dev_and_size.first); + fi_node->workspace_sizes.Set(tgt, dev_and_size.second); + } + + fi_node->io_sizes.Set(GetTargetFromInteger(0), input_size + output_size); + fi_node->constant_sizes.Set(GetTargetFromInteger(0), params_size); + fi_node->relay_primfuncs.Set(GetTargetFromInteger(0), func); + + function_metadata_.Set(kMainFuncStr, FunctionInfo(fi_node)); + } + LoweredOutput Codegen(relay::Function func) { auto pf = GetPackedFunc("relay.backend.GraphPlanMemory"); storage_device_map_ = (*pf)(func); + UpdateMainWorkspaceSize(func); // First we convert all the parameters into input nodes. for (auto param : func->params) { auto node_ptr = GraphInputNode::make_node_ptr(param->name_hint(), GraphAttrs()); @@ -219,6 +315,7 @@ class GraphExecutorCodegen : public backend::MemoizedExprTranslatorLowerExternalFunctions(); + ret.function_metadata = std::move(function_metadata_); return ret; } @@ -349,6 +446,75 @@ class GraphExecutorCodegen : public backend::MemoizedExprTranslator(op)); } + /*! + * \brief Obtain the Target from the device type. + * If homogenous compilation, this will return the only target. + * If heteregenous compilation, this will select associated using the targets_ Map. + * + * \param dev_type + * \return Target + */ + Target GetTargetFromInteger(int64_t dev_type) { + if (targets_.size() == 1) { + // homogeneous execution. + const auto& it = targets_.begin(); + return (*it).second; + } else { + // heterogeneous execution. + std::string call_dev_name; + if (dev_type == 0) { + call_dev_name = "llvm"; + } else { + call_dev_name = runtime::DeviceName(dev_type); + } + if (targets_.count(dev_type) == 0) { + LOG(FATAL) << "No target is provided for device " << call_dev_name; + } + return targets_[dev_type]; + } + } + + /*! + * \brief Update the function metadata for a given cached function and its relay + * primitive function. + * + * \param cfunc The cached function as provided the by the compile engine + * \param relay_func The source relay primitive function + * \param relay_target The target associated with relay primitive function + */ + void UpdateFunctionMetadata(const CachedFunc& cfunc, const Function& relay_func, + const Target& relay_target) { + auto fi_node = make_object(); + for (const auto& kv : cfunc->funcs->functions) { + auto primfunc = Downcast(kv.second); + Integer workspace_size = CalculateWorkspaceBytes(primfunc); + Target primfunc_target = relay_target; + if (primfunc->attrs->dict.count("target")) { + primfunc_target = Downcast(primfunc->attrs->dict["target"]); + } + fi_node->workspace_sizes.Set(primfunc_target, workspace_size); + // Calculating size for I/O + for (auto const& param : primfunc->params) { + auto p_shape = primfunc->buffer_map[param]->shape; + int num_of_elements = 1; + for (const auto& dim_index_expr : p_shape) { + if (dim_index_expr->IsInstance()) { + num_of_elements *= dim_index_expr.as()->value; + } else { + // If shape is dynamic, we cannot calculate workspace in compile time. + num_of_elements = 0; + } + } + int element_size = primfunc->buffer_map[param]->dtype.bytes(); + fi_node->io_sizes.Set(primfunc_target, element_size * num_of_elements); + } + fi_node->constant_sizes.Set(primfunc_target, 0); + fi_node->tir_primfuncs.Set(primfunc_target, primfunc); + fi_node->relay_primfuncs.Set(primfunc_target, relay_func); + } + function_metadata_.Set(cfunc->func_name, FunctionInfo(fi_node)); + } + std::vector VisitExpr_(const CallNode* op) override { Expr expr = GetRef(op); Function func; @@ -383,30 +549,19 @@ class GraphExecutorCodegen : public backend::MemoizedExprTranslatorvalue; + target = GetTargetFromInteger(call_dev_type); // Normal Relay Function - if (targets_.size() == 1) { - // homogeneous execution. - const auto& it = targets_.begin(); - target = (*it).second; - } else { - // heterogeneous execution. - std::string call_dev_name; - if (call_dev_type == 0) { - call_dev_name = "llvm"; - } else { - call_dev_name = runtime::DeviceName(call_dev_type); - } - if (targets_.count(call_dev_type) == 0) { - LOG(FATAL) << "No target is provided for device " << call_dev_name; - } - target = targets_[call_dev_type]; - } + CCacheKey key = (*pf0)(func, target); CachedFunc lowered_func = (*pf1)(compile_engine_, key); if (!lowered_funcs_.count(target->str())) { lowered_funcs_[target->str()] = IRModule(Map({})); } lowered_funcs_[target->str()]->Update(lowered_func->funcs); + + // Update function metadata via looking at all primfuncs + UpdateFunctionMetadata(lowered_func, func, target); + return GraphAddCallNode(op, _GetUniqueName(lowered_func->func_name), lowered_func->func_name); } @@ -551,10 +706,14 @@ class GraphExecutorCodegen : public backend::MemoizedExprTranslator> storage_device_map_; /*! \brief lowered funcs */ std::unordered_map lowered_funcs_; + /*! \brief lowered funcs */ + Map function_metadata_; /*! \brief name map */ std::unordered_map name_map_; /*! \brief compile engine */ CompileEngine compile_engine_; + /*! \brief main function name */ + const String kMainFuncStr = "main_func"; }; class GraphExecutorCodegenModule : public runtime::ModuleNode { @@ -614,6 +773,10 @@ class GraphExecutorCodegenModule : public runtime::ModuleNode { return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { *rv = this->output_.external_mods; }); + } else if (name == "get_function_metadata") { + return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { + *rv = this->output_.function_metadata; + }); } else { return PackedFunc([](TVMArgs args, TVMRetValue* rv) {}); } diff --git a/src/relay/backend/utils.cc b/src/relay/backend/utils.cc new file mode 100644 index 0000000000000..ba865d9d0a5b6 --- /dev/null +++ b/src/relay/backend/utils.cc @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file relay/backend/util.cc + * \brief Relay backend utilities. + */ + +#include "utils.h" + +namespace tvm { +namespace relay { +namespace backend { + +void FunctionInfo::SetWorkspaceSize(Target tgt, tvm::Integer size) { + (*this)->workspace_sizes.Set(tgt, size); +} + +TVM_REGISTER_NODE_TYPE(FunctionInfoNode); +TVM_REGISTER_GLOBAL("relay.backend.FunctionInfo").set_body_typed([]() { return FunctionInfo(); }); +TVM_REGISTER_GLOBAL("relay.backend._FunctionInfo_SetWorkspaceSize") + .set_body_typed([](FunctionInfo fi, Target target, Integer size) { + return fi.SetWorkspaceSize(target, size); + }); + +} // namespace backend +} // namespace relay +} // namespace tvm diff --git a/src/relay/backend/utils.h b/src/relay/backend/utils.h index 6908ca85f5827..ccb1269184080 100644 --- a/src/relay/backend/utils.h +++ b/src/relay/backend/utils.h @@ -43,6 +43,31 @@ namespace tvm { namespace relay { namespace backend { +struct FunctionInfoNode : public Object { + Map workspace_sizes; + Map io_sizes; + Map constant_sizes; + Map tir_primfuncs; + Map relay_primfuncs; + + void VisitAttrs(tvm::AttrVisitor* v) { + v->Visit("workspace_sizes", &workspace_sizes); + v->Visit("io_sizes", &io_sizes); + v->Visit("constant_sizes", &constant_sizes); + v->Visit("tir_primfuncs", &tir_primfuncs); + v->Visit("relay_primfuncs", &relay_primfuncs); + } + + static constexpr const char* _type_key = "relay.backend.FunctionInfo"; + TVM_DECLARE_FINAL_OBJECT_INFO(FunctionInfoNode, Object); +}; + +class FunctionInfo : public ObjectRef { + public: + void SetWorkspaceSize(Target func_var, Integer size); + TVM_DEFINE_MUTABLE_OBJECT_REF_METHODS(FunctionInfo, ObjectRef, FunctionInfoNode); +}; + /*! * \brief A helper to expand the params by adding the ones used in a given expression. */ diff --git a/tests/python/unittest/test_micro_model_library_format.py b/tests/python/unittest/test_micro_model_library_format.py index db6c55bca12ae..0a1cb346203db 100644 --- a/tests/python/unittest/test_micro_model_library_format.py +++ b/tests/python/unittest/test_micro_model_library_format.py @@ -78,19 +78,35 @@ def @main(%a : Tensor[(1, 2), uint8], %b : Tensor[(1, 2), float32], %c : Tensor[ with open(os.path.join(extract_dir, "metadata.json")) as json_f: metadata = json.load(json_f) - assert metadata["version"] == 1 + assert metadata["version"] == 2 assert metadata["model_name"] == "add" export_datetime = datetime.datetime.strptime( metadata["export_datetime"], "%Y-%m-%d %H:%M:%SZ" ) assert (datetime.datetime.now() - export_datetime) < datetime.timedelta(seconds=60 * 5) assert metadata["target"] == {"1": str(target)} - assert metadata["memory"] == [ + assert metadata["memory"]["sids"] == [ {"storage_id": 0, "size_bytes": 2, "input_binding": "a"}, {"storage_id": 1, "size_bytes": 8, "input_binding": "b"}, {"storage_id": 2, "size_bytes": 8, "input_binding": "p0"}, {"storage_id": 3, "size_bytes": 8}, ] + assert metadata["memory"]["functions"] == { + "main_function": [ + { + "constants_size_bytes": 8, + "device": 1, + "io_size_bytes": 18, + "workspace_size_bytes": 0, + } + ], + "operator_functions": [ + { + "function_name": "fused_cast_multiply_add", + "workspace": [{"device": 1, "workspace_size_bytes": 0}], + } + ], + } assert os.path.exists(os.path.join(extract_dir, "codegen", "host", "src", "lib0.c")) assert os.path.exists(os.path.join(extract_dir, "codegen", "host", "src", "lib1.c")) @@ -141,19 +157,35 @@ def @main(%a : Tensor[(1, 2), uint8], %b : Tensor[(1, 2), float32], %c : Tensor[ with open(os.path.join(extract_dir, "metadata.json")) as json_f: metadata = json.load(json_f) - assert metadata["version"] == 1 + assert metadata["version"] == 2 assert metadata["model_name"] == "add" export_datetime = datetime.datetime.strptime( metadata["export_datetime"], "%Y-%m-%d %H:%M:%SZ" ) assert (datetime.datetime.now() - export_datetime) < datetime.timedelta(seconds=60 * 5) assert metadata["target"] == {"1": str(target)} - assert metadata["memory"] == [ + assert metadata["memory"]["sids"] == [ {"storage_id": 0, "size_bytes": 2, "input_binding": "a"}, {"storage_id": 1, "size_bytes": 8, "input_binding": "b"}, {"storage_id": 2, "size_bytes": 8, "input_binding": "p0"}, {"storage_id": 3, "size_bytes": 8}, ] + assert metadata["memory"]["functions"] == { + "main_function": [ + { + "constants_size_bytes": 8, + "device": 1, + "io_size_bytes": 18, + "workspace_size_bytes": 0, + } + ], + "operator_functions": [ + { + "function_name": "fused_cast_multiply_add_1", + "workspace": [{"device": 1, "workspace_size_bytes": 0}], + } + ], + } assert os.path.exists(os.path.join(extract_dir, "codegen", "host", "lib", "lib0.o")) @@ -167,11 +199,68 @@ def @main(%a : Tensor[(1, 2), uint8], %b : Tensor[(1, 2), float32], %c : Tensor[ assert "p0" in params +@tvm.testing.requires_micro +def test_export_model_library_format_workspace(): + with utils.TempDirectory.set_keep_for_debug(True): + target = tvm.target.target.micro("host") + with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}): + relay_mod = tvm.parser.fromtext( + """ + #[version = "0.0.5"] + def @main(%p0: Tensor[(1, 56, 56, 128), int16], %p1: Tensor[(3, 3, 128, 1), int16], %p2: Tensor[(1, 1, 1, 128), int32]){ + %0 = nn.conv2d(%p0, %p1, padding=[1, 1, 1, 1], groups=128, channels=128, kernel_size=[3, 3], data_layout="NHWC", kernel_layout="HWOI", out_dtype="int32") /* ty=Tensor[(1, 56, 56, 128), int32] */; + %1 = add(%0, %p2) /* ty=Tensor[(1, 56, 56, 128), int32] */; + %2 = fixed_point_multiply(%1, multiplier=2080045879, shift=-4) /* ty=Tensor[(1, 56, 56, 128), int32] */; + %3 = clip(%2, a_min=0f, a_max=255f) /* ty=Tensor[(1, 56, 56, 128), int32] */; + cast(%3, dtype="uint8") /* ty=Tensor[(1, 56, 56, 128), uint8] */ + } + """ + ) + factory = tvm.relay.build(relay_mod, target, target_host=target, mod_name="qnn_conv2d") + + temp_dir = utils.tempdir() + mlf_tar_path = temp_dir.relpath("lib.tar") + import tvm.micro as micro + + micro.export_model_library_format(factory, mlf_tar_path) + tf = tarfile.open(mlf_tar_path) + + extract_dir = temp_dir.relpath("extract") + os.mkdir(extract_dir) + tf.extractall(extract_dir) + + with open(os.path.join(extract_dir, "metadata.json")) as json_f: + metadata = json.load(json_f) + assert metadata["version"] == 2 + assert metadata["model_name"] == "qnn_conv2d" + export_datetime = datetime.datetime.strptime( + metadata["export_datetime"], "%Y-%m-%d %H:%M:%SZ" + ) + assert (datetime.datetime.now() - export_datetime) < datetime.timedelta(seconds=60 * 5) + assert metadata["target"] == {"1": str(target)} + assert metadata["memory"]["functions"] == { + "main_function": [ + { + "constants_size_bytes": 0, + "device": 1, + "io_size_bytes": 1207040, + "workspace_size_bytes": 2466816, + } + ], + "operator_functions": [ + { + "function_name": "fused_nn_conv2d_add_fixed_point_multiply_clip_cast", + "workspace": [{"device": 1, "workspace_size_bytes": 2466816}], + } + ], + } + + @tvm.testing.requires_micro def test_export_model(): module = tvm.support.FrontendTestModule() factory = graph_executor_factory.GraphExecutorFactoryModule( - None, tvm.target.target.micro("host"), '"graph_json"', module, "test_module", {} + None, tvm.target.target.micro("host"), '"graph_json"', module, "test_module", {}, {} ) temp_dir = utils.tempdir()