Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[microNPU] enable USMP #10022

Merged
merged 6 commits into from
Feb 21, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion apps/microtvm/zephyr_cmsisnn/src/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ extern float output_storage[12];

extern const size_t output_len;

static uint8_t g_crt_workspace[TVMGEN_DEFAULT_WORKSPACE_SIZE + 256];
static uint8_t g_crt_workspace[TVMGEN_DEFAULT_WORKSPACE_SIZE + 512];
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did I add this? This looks like a hack to increase the workspace size for over-allocation, if we still need this is something broken in USMP?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yea -- I was puzzled by this too.

In fact, USMP is not enabled for cmsis-nn (yet -- its coming in the next PR) and this is only needed for Zephyr -- other cmsis-nn tests are fine. I am not familiar with Zephyr related impacts here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i.e. when USMP is enabled we dont need this workspace altogether.

Copy link
Contributor Author

@manupak manupak Feb 17, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was surfaced when I removed this :

# TODO(Mousius) - Remove this massive hack when Targets are unified
if target.kind.name in external_codegens:
device_max_workspace[main_target] += int(workspace_size)
.

Which seems to be adding the workspace again for external_codegens.

I think we need to investigate this further

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Zephyr should have no real impact on the memory pre-allocated here as it's just a block in flash, this is deeply worrying as the allocator is configured here:

StackMemoryManager_Init(&app_workspace, g_crt_workspace, TVMGEN_DEFAULT_WORKSPACE_SIZE);

Thus the allocator itself should never go over if it's performing properly, something is very weird here but I agree we should investigate further when we've integrated USMP fully.

tvm_workspace_t app_workspace;

void TVMLogf(const char* msg, ...) {
Expand Down
13 changes: 7 additions & 6 deletions include/tvm/tir/usmp/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -185,24 +185,24 @@ struct AllocatedPoolInfoNode : public Object {
PoolInfo pool_info;
/*! \brief The allocated size into this pool */
Integer allocated_size;
/*! \brief An optional associated pool Var*/
Optional<Var> pool_var;
/*! \brief An optional associated pool Var index of PrimFunc params*/
Optional<Integer> pool_var_idx;

void VisitAttrs(tvm::AttrVisitor* v) {
v->Visit("pool_info", &pool_info);
v->Visit("allocated_size", &allocated_size);
v->Visit("pool_var", &pool_var);
v->Visit("pool_var_idx", &pool_var_idx);
}

bool SEqualReduce(const AllocatedPoolInfoNode* other, SEqualReducer equal) const {
return equal(pool_info, other->pool_info) && equal(allocated_size, other->allocated_size) &&
equal(pool_var, other->pool_var);
equal(pool_var_idx, other->pool_var_idx);
}

void SHashReduce(SHashReducer hash_reduce) const {
hash_reduce(pool_info);
hash_reduce(allocated_size);
hash_reduce(pool_var);
hash_reduce(pool_var_idx);
}

static constexpr const char* _type_key = "tir.usmp.AllocatedPoolInfo";
Expand All @@ -211,7 +211,8 @@ struct AllocatedPoolInfoNode : public Object {

class AllocatedPoolInfo : public ObjectRef {
public:
TVM_DLL AllocatedPoolInfo(PoolInfo pool_info, Integer allocated_size, Var pool_var = Var());
TVM_DLL AllocatedPoolInfo(PoolInfo pool_info, Integer allocated_size,
Integer pool_var_idx = Integer());
TVM_DEFINE_MUTABLE_OBJECT_REF_METHODS(AllocatedPoolInfo, ObjectRef, AllocatedPoolInfoNode);
};

Expand Down
91 changes: 48 additions & 43 deletions python/tvm/micro/model_library_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,42 +181,26 @@ def _build_function_memory_map(function_metadata):
"""
device_max_workspace = dict()
main_func_metadata = function_metadata[MAIN_FUNC_NAME_STR]
num_targets = len(main_func_metadata.workspace_sizes.items())
from tvm.driver import tvmc # pylint: disable=import-outside-toplevel

external_codegens = tvmc.composite_target.get_codegen_names()
func_entries = []
target_local_entries = dict()
for i in range(num_targets):
main_target = main_func_metadata.workspace_sizes.items()[i][0]
device_max_workspace[main_target] = 0
for func_name, finfo in function_metadata.items():
if func_name == MAIN_FUNC_NAME_STR:
continue
target_local_entries[func_name] = list()

for func_name, finfo in function_metadata.items():
# Skip a few unsupported cases:
# 1. The main function metadata is exported elsewhere.
# 2. BYOC operator implementations do not currently export useful FunctionInfo.
if func_name == MAIN_FUNC_NAME_STR or not finfo.tir_primfuncs:
continue
assert (
len(finfo.constant_sizes.items()) == num_targets
), f"{func_name}: found {finfo.constant_sizes!r} vs {num_targets}"
assert len(finfo.io_sizes.items()) == num_targets
target = finfo.workspace_sizes.items()[i][0]
workspace_size = finfo.workspace_sizes.items()[i][1]
for func_name, finfo in function_metadata.items():
# Skip a few unsupported cases:
# 1. The main function metadata is exported elsewhere.
# 2. BYOC operator implementations do not currently export useful FunctionInfo.
if func_name == MAIN_FUNC_NAME_STR or not finfo.tir_primfuncs:
continue
if func_name not in target_local_entries.keys():
target_local_entries[func_name] = list()
for target in dict(finfo.workspace_sizes).keys():
workspace_size = finfo.workspace_sizes[target]
target_entry = {
"device": int(target.kind.device_type),
"workspace_size_bytes": int(workspace_size),
}
target_local_entries[func_name].append(target_entry)
if workspace_size > device_max_workspace.get(target, 0):
device_max_workspace[target] = workspace_size
# TODO(Mousius) - Remove this massive hack when Targets are unified
if target.kind.name in external_codegens:
device_max_workspace[main_target] += int(workspace_size)
if workspace_size >= device_max_workspace.get(int(target.kind.device_type), 0):
device_max_workspace[int(target.kind.device_type)] = workspace_size

for func_name, target_entries_ in target_local_entries.items():
func_entry = {
Expand All @@ -225,25 +209,46 @@ def _build_function_memory_map(function_metadata):
}
func_entries.append(func_entry)

target_main_entries = list()
for i in range(num_targets):
target = main_func_metadata.workspace_sizes.items()[i][0]
main_func_local_workspace = main_func_metadata.workspace_sizes.items()[i][1]
main_func_constants = main_func_metadata.constant_sizes.items()[i][1]
main_func_io = main_func_metadata.io_sizes.items()[i][1]
target_main_entries.append(
{
"device": int(target.kind.device_type),
"workspace_size_bytes": int(device_max_workspace[target])
+ int(main_func_local_workspace),
"constants_size_bytes": int(main_func_constants),
"io_size_bytes": int(main_func_io),
}
target_main_entries = dict()

def _create_empty_entry(target_device_type):
return {
"device": int(target_device_type),
"workspace_size_bytes": 0,
"constants_size_bytes": 0,
"io_size_bytes": 0,
}

for target in dict(main_func_metadata.workspace_sizes).keys():
main_func_local_workspace = main_func_metadata.workspace_sizes[target]
target_main_entries[int(target.kind.device_type)] = _create_empty_entry(
int(target.kind.device_type)
)
target_main_entries[int(target.kind.device_type)]["workspace_size_bytes"] = int(
device_max_workspace.get(int(target.kind.device_type), 0)
) + int(main_func_local_workspace)

for target in dict(main_func_metadata.constant_sizes).keys():
if int(target.kind.device_type) not in target_main_entries.keys():
target_main_entries[int(target.kind.device_type)] = _create_empty_entry(
int(target.kind.device_type)
)
target_main_entries[int(target.kind.device_type)]["constants_size_bytes"] = int(
main_func_metadata.constant_sizes[target]
)

for target in dict(main_func_metadata.io_sizes).keys():
if int(target.kind.device_type) not in target_main_entries.keys():
target_main_entries[int(target.kind.device_type)] = _create_empty_entry(
int(target.kind.device_type)
)
target_main_entries[int(target.kind.device_type)]["io_size_bytes"] = int(
main_func_metadata.io_sizes[target]
)

ret = {
"operator_functions": func_entries,
"main": target_main_entries,
"main": list(target_main_entries.values()),
}
return ret

Expand Down
Loading