-
Notifications
You must be signed in to change notification settings - Fork 3.8k
Closed
Labels
needs-triagePRs or issues that need to be investigated by maintainers to find the right assignees to address itPRs or issues that need to be investigated by maintainers to find the right assignees to address ittype: bug
Description
Seems like this subgraph failed at rewrite_tensorize during tuning.
Actual behavior
1: operator()
at /home/yj/tvm/src/meta_schedule/postproc/rewrite_tensorize.cc:37
0: tvm::tir::ScheduleNode::GetSRef(tvm::tir::StmtNode const*) const
at /home/yj/tvm/src/tir/schedule/schedule.cc:36
File "/home/yj/tvm/src/support/parallel_for.cc", line 128
RuntimeError: parallel_for_dynamic error with [16:44:30] /home/yj/tvm/src/tir/schedule/schedule.cc:36: IndexError: The stmt doesn't exist in the IR
Environment
TVM git hash #13404
Steps to reproduce
import tvm
from tvm.script import tir as T
from tvm.topi.x86.utils import target_has_vnni
# fmt: off
@tvm.script.ir_module
class Module:
@T.prim_func
def main(p0: T.Buffer[(T.int64(16), T.int64(16), T.int64(112), T.int64(112)), "float16"], p1: T.Buffer[(T.int64(96), T.int64(16), T.int64(1), T.int64(1)), "float16"], p2: T.Buffer[(T.int64(1), T.int64(96), T.int64(1), T.int64(1)), "float16"], compute: T.Buffer[(T.int64(16), T.int64(96), T.int64(112), T.int64(112)), "float16"]):
# function attr dict
T.func_attr({"global_symbol": "main", "tir.noalias": True})
# body
# with T.block("root")
pad_temp = T.alloc_buffer([T.int64(16), T.int64(16), T.int64(112), T.int64(112)], dtype="float16")
conv2d_nchw = T.alloc_buffer([T.int64(16), T.int64(96), T.int64(112), T.int64(112)], dtype="float16")
T_add = T.alloc_buffer([T.int64(16), T.int64(96), T.int64(112), T.int64(112)], dtype="float16")
for i0, i1, i2, i3 in T.grid(T.int64(16), T.int64(16), T.int64(112), T.int64(112)):
with T.block("pad_temp"):
i0_1, i1_1, i2_1, i3_1 = T.axis.remap("SSSS", [i0, i1, i2, i3])
T.reads(p0[i0_1, i1_1, i2_1, i3_1])
T.writes(pad_temp[i0_1, i1_1, i2_1, i3_1])
pad_temp[i0_1, i1_1, i2_1, i3_1] = p0[i0_1, i1_1, i2_1, i3_1]
for i0, i1, i2, i3, i4, i5, i6 in T.grid(T.int64(16), T.int64(96), T.int64(112), T.int64(112), T.int64(16), T.int64(1), T.int64(1)):
with T.block("conv2d_nchw"):
nn, ff, yy, xx, rc, ry, rx = T.axis.remap("SSSSRRR", [i0, i1, i2, i3, i4, i5, i6])
T.reads(pad_temp[nn, rc, yy + ry, xx + rx], p1[ff, rc, ry, rx])
T.writes(conv2d_nchw[nn, ff, yy, xx])
with T.init():
conv2d_nchw[nn, ff, yy, xx] = T.float16(0)
conv2d_nchw[nn, ff, yy, xx] = conv2d_nchw[nn, ff, yy, xx] + pad_temp[nn, rc, yy + ry, xx + rx] * p1[ff, rc, ry, rx]
for i0, i1, i2, i3 in T.grid(T.int64(16), T.int64(96), T.int64(112), T.int64(112)):
with T.block("T_add"):
ax0, ax1, ax2, ax3 = T.axis.remap("SSSS", [i0, i1, i2, i3])
T.reads(conv2d_nchw[ax0, ax1, ax2, ax3], p2[T.int64(0), ax1, T.int64(0), T.int64(0)])
T.writes(T_add[ax0, ax1, ax2, ax3])
T_add[ax0, ax1, ax2, ax3] = conv2d_nchw[ax0, ax1, ax2, ax3] + p2[T.int64(0), ax1, T.int64(0), T.int64(0)]
for i0, i1, i2, i3 in T.grid(T.int64(16), T.int64(96), T.int64(112), T.int64(112)):
with T.block("compute"):
i0_2, i1_2, i2_2, i3_2 = T.axis.remap("SSSS", [i0, i1, i2, i3])
T.reads(T_add[i0_2, i1_2, i2_2, i3_2])
T.writes(compute[i0_2, i1_2, i2_2, i3_2])
compute[i0_2, i1_2, i2_2, i3_2] = T.max(T.min(T_add[i0_2, i1_2, i2_2, i3_2], T.float16(6)), T.float16(0))
# fmt: on
from tvm import meta_schedule as ms
import tempfile
mod = Module
target = tvm.target.Target("nvidia/geforce-rtx-3070")
with tempfile.TemporaryDirectory() as work_dir:
ms.tir_integration.tune_tir(mod, target, work_dir, 20000)
- tune:meta_schedule
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
needs-triagePRs or issues that need to be investigated by maintainers to find the right assignees to address itPRs or issues that need to be investigated by maintainers to find the right assignees to address ittype: bug