In [1]:
import logging
import sys
import IPython

import numpy as np
import tvm
from tvm import te
import tvm.testing

# 模块名叫 `autotvm`
from tvm import autotvm

In [35]:
def matmul_basic(N, L, M, dtype):

    A = te.placeholder((N, L), name="A", dtype=dtype)
    B = te.placeholder((L, M), name="B", dtype=dtype)
    BIAS = te.const(0.5, "float32")

    k = te.reduce_axis((0, L), name="k")
    C = te.compute((N, M), lambda i, j: te.sum(A[i, k] * B[k, j], axis=k), name="C")
    ADD_BIAS = te.compute((N, M), lambda i, j: C[i, j] + BIAS[i, j], name="ADD_BIAS")
    s = te.create_schedule(ADD_BIAS.op)

    # # 调度
    # y, x = s[C].op.axis
    # k = s[C].op.reduce_axis[0]

    # yo, yi = s[C].split(y, 8)
    # xo, xi = s[C].split(x, 8)

    # s[C].reorder(yo, xo, k, yi, xi)

    return s, [A, B, BIAS, C, ADD_BIAS]

In [36]:
s, [A, B, BIAS, C, ADD_BIAS] = matmul_basic(1024, 1024, 1024, "float32")

TypeError: 'FloatImm' object is not subscriptable

In [18]:
IPython.display.Code(tvm.lower(s, [A, B, BIAS, C, ADD_BIAS]).script(), language="python")

In [3]:
# Matmul V1: 列出候选值
@autotvm.template("tutorial/matmul_v1")  # 1. 使用装饰器
def matmul_v1(N, L, M, dtype):
    A = te.placeholder((N, L), name="A", dtype=dtype)
    B = te.placeholder((L, M), name="B", dtype=dtype)

    k = te.reduce_axis((0, L), name="k")
    C = te.compute((N, M), lambda i, j: te.sum(A[i, k] * B[k, j], axis=k), name="C")
    s = te.create_schedule(C.op)

    # 调度
    y, x = s[C].op.axis
    k = s[C].op.reduce_axis[0]

    # 2. 获取 config 对象
    cfg = autotvm.get_config()

    # 3. 定义搜索空间
    cfg.define_knob("tile_y", [1, 2, 4, 8, 16])
    cfg.define_knob("tile_x", [1, 2, 4, 8, 16])

    # 4. 根据 config 进行调度
    yo, yi = s[C].split(y, cfg["tile_y"].val)
    xo, xi = s[C].split(x, cfg["tile_x"].val)

    s[C].reorder(yo, xo, k, yi, xi)

    return s, [A, B, C]

In [4]:
@autotvm.template("tutorial/matmul")
def matmul(N, L, M, dtype):
    A = te.placeholder((N, L), name="A", dtype=dtype)
    B = te.placeholder((L, M), name="B", dtype=dtype)

    k = te.reduce_axis((0, L), name="k")
    C = te.compute((N, M), lambda i, j: te.sum(A[i, k] * B[k, j], axis=k), name="C")
    s = te.create_schedule(C.op)

    # 调度
    y, x = s[C].op.axis
    k = s[C].op.reduce_axis[0]

    ##### 开始定义空间 #####
    cfg = autotvm.get_config()
    cfg.define_split("tile_y", y, num_outputs=2)
    cfg.define_split("tile_x", x, num_outputs=2)
    ##### 结束定义空间 #####

    # 根据 config 进行调度
    yo, yi = cfg["tile_y"].apply(s, C, y)
    xo, xi = cfg["tile_x"].apply(s, C, x)

    s[C].reorder(yo, xo, k, yi, xi)

    return s, [A, B, C]

In [5]:
N, L, M = 1024, 1024, 1024
task = autotvm.task.create("tutorial/matmul", args=(N, L, M, "float32"), target="llvm")
print(task.config_space)

ConfigSpace (len=121, range_length=121, space_map=
   0 tile_y: Split(policy=factors, product=1024, num_outputs=2) len=11
   1 tile_x: Split(policy=factors, product=1024, num_outputs=2) len=11
)


In [6]:
# 记录 config（为了将 tuning 日志打印到屏幕）
logging.getLogger("autotvm").setLevel(logging.DEBUG)
logging.getLogger("autotvm").addHandler(logging.StreamHandler(sys.stdout))

In [13]:
measure_option = autotvm.measure_option(builder="local", runner=autotvm.LocalRunner(number=5))

# 用 RandomTuner 开始调优, 日志记录到 `matmul.log` 文件中
# 可用 XGBTuner 来替代.
tuner = autotvm.tuner.RandomTuner(task)
tuner.tune(
    n_trial=200,
    measure_option=measure_option,
    callbacks=[autotvm.callback.log_to_file("matmul.log")],
)

waiting for device...
device available
Get devices for measurement successfully!
No: 1	GFLOPS: 0.00/0.00	result: Traceback (most recent call last):
  File "/home/yrx/projects/tvm/python/tvm/autotvm/measure/measure_methods.py", line 712, in __call__
    yield remote, remote.load_module(os.path.split(build_result.filename)[1])
  File "/home/yrx/projects/tvm/python/tvm/autotvm/measure/measure_methods.py", line 679, in run_through_rpc
    costs = time_f(*args).results
  File "/home/yrx/projects/tvm/python/tvm/runtime/module.py", line 401, in evaluator
    blob = feval(*args)
  File "/home/yrx/projects/tvm/python/tvm/_ffi/_ctypes/packed_func.py", line 239, in __call__
    raise_last_ffi_error()
  File "/home/yrx/projects/tvm/python/tvm/_ffi/base.py", line 481, in raise_last_ffi_error
    raise py_err
tvm.error.RPCSessionTimeoutError: Traceback (most recent call last):
  4: tvm::runtime::RPCWrappedFunc::operator()(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*) const
  3: tvm::runtime::RP

KeyboardInterrupt: 

In [9]:
# 从日志文件中应用历史最佳
with autotvm.apply_history_best("matmul.log"):
    with tvm.target.Target("llvm"):
        s, arg_bufs = matmul(N, L, M, "float32")
        func = tvm.build(s, arg_bufs)

# 验证正确性
a_np = np.random.uniform(size=(N, L)).astype(np.float32)
b_np = np.random.uniform(size=(L, M)).astype(np.float32)
c_np = a_np.dot(b_np)

c_tvm = tvm.nd.empty(c_np.shape)
func(tvm.nd.array(a_np), tvm.nd.array(b_np), c_tvm)

tvm.testing.assert_allclose(c_np, c_tvm.numpy(), rtol=1e-4)

Finish loading 50 records


In [12]:
dev = tvm.device("llvm", 0)
evaluator = func.time_evaluator(func.entry_name, dev, number=10)
mean_time = evaluator(tvm.nd.array(a_np), tvm.nd.array(b_np), c_tvm).mean
print(mean_time)

0.0799122168
