From 58244be9417aa6ba3cdb4feba44328e65ff5fad9 Mon Sep 17 00:00:00 2001 From: Egor Churaev Date: Tue, 5 Sep 2023 11:40:59 +0300 Subject: [PATCH 1/2] [VM][Adreno] Fix using buffers for weights in VM In VM `fn->attrs` doesn't contain information about `kernel_layout`. So we can get this value from `expr_attrib`. In this PR function `CanUseBuffers` was modified to work with VM. A new test which checks memory scope for VM was added. --- .../transforms/annotate_texture_storage.cc | 8 ++- .../test_conv2d_nchw_texture.py | 72 ++++++++++++++++--- .../opencl_texture/utils/adreno_utils.py | 18 ++--- 3 files changed, 73 insertions(+), 25 deletions(-) diff --git a/src/relay/transforms/annotate_texture_storage.cc b/src/relay/transforms/annotate_texture_storage.cc index 4921cef4c8c2..01d47b69530b 100644 --- a/src/relay/transforms/annotate_texture_storage.cc +++ b/src/relay/transforms/annotate_texture_storage.cc @@ -174,8 +174,11 @@ class StorageInfo : private transform::DeviceAwareExprVisitor { for (const auto& ttype : FlattenTupleType(fn->params[i]->checked_type())) { std::string scope = Scope(ttype->shape, GetVirtualDevice(GetRef(call))); if (expr_attrib.as() || expr_attrib.as()) { + String kernel_layout = expr_attrib.as() + ? expr_attrib.as()->kernel_layout + : expr_attrib.as()->kernel_layout; if ((i == weights_pos) && !ttype->dtype.is_float16() && - CanUseBuffers(call->args[i], ttype->shape, fn->attrs)) { + CanUseBuffers(call->args[i], ttype->shape, kernel_layout)) { buffers_params.insert(fn->params[i]); buffers_args.insert(call->args[i]); scope = "global"; @@ -426,10 +429,9 @@ class StorageInfo : private transform::DeviceAwareExprVisitor { } bool CanUseBuffers(const Expr param, const Array shape, - const tvm::DictAttrs param_attrs) const { + const String kernel_layout) const { bool use_buffer = false; if (param.as() && shape.size() == 5) { - auto kernel_layout = param_attrs.GetAttr("kernel_layout"); if (kernel_layout == "HWOI4o" || kernel_layout == "HWIO4o") { int a0 = shape[0].as()->value; int a1 = shape[1].as()->value; diff --git a/tests/python/relay/opencl_texture/test_conv2d_nchw_texture.py b/tests/python/relay/opencl_texture/test_conv2d_nchw_texture.py index 3c9c3f2caf1e..5fd86adf6f69 100644 --- a/tests/python/relay/opencl_texture/test_conv2d_nchw_texture.py +++ b/tests/python/relay/opencl_texture/test_conv2d_nchw_texture.py @@ -692,7 +692,6 @@ def test_residual_block(remote, target, executor_type, dtype): {"data": input_shape}, {"data": dtype}, target, - static_memory_scope, ) @@ -803,8 +802,6 @@ def test_concat(remote, target, executor_type, dtype): "", ] - static_memory_scope = [] - if executor_type == "ge": build_run_compare( remote, @@ -823,7 +820,6 @@ def test_concat(remote, target, executor_type, dtype): {"data": input_shape}, {"data": dtype}, target, - static_memory_scope, ) @@ -968,7 +964,6 @@ def test_pooling_branching_texture_params(remote, target, executor_type, dtype): {"data": input_shape}, {"data": dtype}, target, - static_memory_scope, ) @@ -1111,7 +1106,6 @@ def test_branching_texture_params(remote, target, executor_type, dtype): {"data": input_shape}, {"data": dtype}, target, - static_memory_scope, ) @@ -1212,7 +1206,6 @@ def test_conv2d_different_lowering_same_op(remote, target, executor_type, dtype) {"data": input_shape}, {"data": dtype}, target, - static_memory_scope, ) @@ -1380,7 +1373,6 @@ def test_injective_nwo_inputs1(remote, target, executor_type, dtype): {"data": input_shape}, {"data": dtype}, target, - static_memory_scope, ) @@ -1495,7 +1487,6 @@ def test_injective_nwo_inputs2(remote, target, executor_type, dtype): {"data": input_shape}, {"data": dtype}, target, - static_memory_scope, ) @@ -1534,5 +1525,68 @@ def test_conv2d_to_3_channels(remote, target, executor_type, dtype): ) +@tvm.testing.requires_opencl +@tvm.testing.parametrize_targets("opencl -device=adreno") +def test_conv2d_weight_on_buffers(remote, target, executor_type, dtype): + target = "opencl -device=adreno" + input_shape = (1, 64, 75, 75) + filter_shape = (64, 64, 3, 3) + bias_shape = (64,) + A = relay.var("data", shape=input_shape, dtype=dtype) + W = relay.var("weight", shape=filter_shape, dtype=dtype) + BS = relay.var("bias", shape=bias_shape, dtype=dtype) + conv = relay.nn.conv2d(A, W, padding=[1, 1, 1, 1], channels=64, kernel_size=(3, 3)) + conv = relay.nn.bias_add(conv, BS) + conv = relay.op.nn.relu(conv) + + mod = relay.Function([A, W, BS], conv) + np.random.seed(0) + initializer = relay.testing.init.Xavier() + filter_data = np.zeros(filter_shape).astype(dtype) + bias_data = np.zeros(bias_shape).astype(dtype) + initializer("weight", filter_data) + initializer("bias", bias_data) + params1 = { + "weight": tvm.nd.array(filter_data), + "bias": tvm.nd.array(bias_data), + } + + if executor_type == "ge": + static_memory_scope = [ + "", + "global.texture", + "global", + "global.texture-weight", + "", + "", + ] + build_run_compare( + remote, + mod, + params1, + {"data": input_shape}, + {"data": dtype}, + target, + static_memory_scope, + ) + else: + static_memory_scope = """ + VM VirtualDevice[0]: device type 1, id 0 and mem_scope + VM VirtualDevice[1]: device type 4, id 0 and mem_scope + VM VirtualDevice[2]: device type 4, id 0 and mem_scope global.texture + VM VirtualDevice[3]: device type 4, id 0 and mem_scope global + VM VirtualDevice[4]: device type 4, id 0 and mem_scope global.texture-weight + """ + build_run_compare_vm( + remote, + mod, + params1, + {"data": input_shape}, + {"data": dtype}, + target, + static_memory_scope, + ) + + if __name__ == "__main__": tvm.testing.main() diff --git a/tests/python/relay/opencl_texture/utils/adreno_utils.py b/tests/python/relay/opencl_texture/utils/adreno_utils.py index 309243df1624..722922f544d8 100644 --- a/tests/python/relay/opencl_texture/utils/adreno_utils.py +++ b/tests/python/relay/opencl_texture/utils/adreno_utils.py @@ -161,19 +161,11 @@ def build_run_compare_vm( tvm_mod_nchwc, target=target, target_host=target_host, params=params1 ) - # TODO(echuraev): enable scope checking - ## verification that storage_scope has expected textures scopes - # graph_json = json.loads(graph) - # if "storage_scope" in graph_json["attrs"]: - # assert ( - # len(static_mem_scopes) == len(graph_json["attrs"]["storage_scope"][1]) - # or len(static_mem_scopes) == 0 - # ) - # else: - # assert len(static_mem_scopes) == 0 - - # for i in range(0, len(static_mem_scopes)): - # assert static_mem_scopes[i] == graph_json["attrs"]["storage_scope"][1][i] + if len(static_mem_scopes) > 0: + mem_scopes_lines = static_mem_scopes.strip().split('\n') + vm_lines = vmc._get_virtual_devices().strip().split('\n') + for i in range(0, len(mem_scopes_lines)): + assert mem_scopes_lines[i].strip() == vm_lines[i].strip() if remote is None: dev = tvm.opencl() From a9b41daa099e7614e748be3ab994ff1ff7ede7ae Mon Sep 17 00:00:00 2001 From: Egor Churaev Date: Wed, 6 Sep 2023 09:15:57 +0300 Subject: [PATCH 2/2] Fix ci --- .../python/relay/opencl_texture/test_conv2d_nchw_texture.py | 5 +++-- tests/python/relay/opencl_texture/utils/adreno_utils.py | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/python/relay/opencl_texture/test_conv2d_nchw_texture.py b/tests/python/relay/opencl_texture/test_conv2d_nchw_texture.py index 5fd86adf6f69..1dd5ca2abd00 100644 --- a/tests/python/relay/opencl_texture/test_conv2d_nchw_texture.py +++ b/tests/python/relay/opencl_texture/test_conv2d_nchw_texture.py @@ -789,11 +789,12 @@ def test_concat(remote, target, executor_type, dtype): static_memory_scope = [ "", + "global.texture", "global", "global.texture-weight", - "global.texture-weight", "global", - "global.texture-weight", + "global.texture-nhwc", + "global", "global.texture-weight", "", "", diff --git a/tests/python/relay/opencl_texture/utils/adreno_utils.py b/tests/python/relay/opencl_texture/utils/adreno_utils.py index 722922f544d8..d9e52f8847a7 100644 --- a/tests/python/relay/opencl_texture/utils/adreno_utils.py +++ b/tests/python/relay/opencl_texture/utils/adreno_utils.py @@ -162,8 +162,8 @@ def build_run_compare_vm( ) if len(static_mem_scopes) > 0: - mem_scopes_lines = static_mem_scopes.strip().split('\n') - vm_lines = vmc._get_virtual_devices().strip().split('\n') + mem_scopes_lines = static_mem_scopes.strip().split("\n") + vm_lines = vmc._get_virtual_devices().strip().split("\n") for i in range(0, len(mem_scopes_lines)): assert mem_scopes_lines[i].strip() == vm_lines[i].strip()