In [1]:
import logging
import sys

import numpy as np
import tvm
from tvm import te
import tvm.testing

# the module is called `autotvm`
from tvm import autotvm

In [2]:
def matmul_basic(N, L, M, dtype):

    A = te.placeholder((N, L), name="A", dtype=dtype)
    B = te.placeholder((L, M), name="B", dtype=dtype)

    k = te.reduce_axis((0, L), name="k")
    C = te.compute((N, M), lambda i, j: te.sum(A[i, k] * B[k, j], axis=k), name="C")
    s = te.create_schedule(C.op)

    # schedule
    y, x = s[C].op.axis
    k = s[C].op.reduce_axis[0]

    yo, yi = s[C].split(y, 8)
    xo, xi = s[C].split(x, 8)

    s[C].reorder(yo, xo, k, yi, xi)

    return s, [A, B, C]

In [3]:
# Matmul V1: List candidate values
@autotvm.template("tutorial/matmul_v1")  # 1. use a decorator
def matmul_v1(N, L, M, dtype):
    A = te.placeholder((N, L), name="A", dtype=dtype)
    B = te.placeholder((L, M), name="B", dtype=dtype)

    k = te.reduce_axis((0, L), name="k")
    C = te.compute((N, M), lambda i, j: te.sum(A[i, k] * B[k, j], axis=k), name="C")
    s = te.create_schedule(C.op)

    # schedule
    y, x = s[C].op.axis
    k = s[C].op.reduce_axis[0]

    # 2. get the config object
    cfg = autotvm.get_config()

    # 3. define search space
    cfg.define_knob("tile_y", [1, 2, 4, 8, 16])
    cfg.define_knob("tile_x", [1, 2, 4, 8, 16])

    # 4. schedule according to config
    yo, yi = s[C].split(y, cfg["tile_y"].val)
    xo, xi = s[C].split(x, cfg["tile_x"].val)

    s[C].reorder(yo, xo, k, yi, xi)

    return s, [A, B, C]

In [4]:
@autotvm.template("tutorial/matmul")
def matmul(N, L, M, dtype):
    A = te.placeholder((N, L), name="A", dtype=dtype)
    B = te.placeholder((L, M), name="B", dtype=dtype)

    k = te.reduce_axis((0, L), name="k")
    C = te.compute((N, M), lambda i, j: te.sum(A[i, k] * B[k, j], axis=k), name="C")
    s = te.create_schedule(C.op)

    # schedule
    y, x = s[C].op.axis
    k = s[C].op.reduce_axis[0]

    ##### define space begin #####
    cfg = autotvm.get_config()
    cfg.define_split("tile_y", y, num_outputs=2)
    cfg.define_split("tile_x", x, num_outputs=2)
    ##### define space end #####

    # schedule according to config
    yo, yi = cfg["tile_y"].apply(s, C, y)
    xo, xi = cfg["tile_x"].apply(s, C, x)

    s[C].reorder(yo, xo, k, yi, xi)

    return s, [A, B, C]

In [6]:
N, L, M = 512, 512, 512
task = autotvm.task.create("tutorial/matmul", args=(N, L, M, "float32"), target="llvm")
print(task.config_space)

ConfigSpace (len=100, space_map=
   0 tile_y: Split(policy=factors, product=512, num_outputs=2) len=10
   1 tile_x: Split(policy=factors, product=512, num_outputs=2) len=10
)


In [7]:
# logging config (for printing tuning log to the screen)
logging.getLogger("autotvm").setLevel(logging.DEBUG)
logging.getLogger("autotvm").addHandler(logging.StreamHandler(sys.stdout))

In [8]:
measure_option = autotvm.measure_option(builder="local", runner=autotvm.LocalRunner(number=5))

# Begin tuning with RandomTuner, log records to file `matmul.log`
# You can use alternatives like XGBTuner.
tuner = autotvm.tuner.RandomTuner(task)
tuner.tune(
    n_trial=10,
    measure_option=measure_option,
    callbacks=[autotvm.callback.log_to_file("matmul.log")],
)

waiting for device...
device available
Get devices for measurement successfully!
No: 1	GFLOPS: 43.56/43.56	result: MeasureResult(costs=(0.0061617288,), error_no=MeasureErrorNo.NO_ERROR, all_cost=0.2046210765838623, timestamp=1658199544.5570998)	[('tile_y', [-1, 1]), ('tile_x', [-1, 128])],None,70
No: 2	GFLOPS: 7.24/43.56	result: MeasureResult(costs=(0.0370627438,), error_no=MeasureErrorNo.NO_ERROR, all_cost=0.6762516498565674, timestamp=1658199545.3074584)	[('tile_y', [-1, 256]), ('tile_x', [-1, 8])],None,38
No: 3	GFLOPS: 35.04/43.56	result: MeasureResult(costs=(0.0076614126000000005,), error_no=MeasureErrorNo.NO_ERROR, all_cost=0.2152712345123291, timestamp=1658199545.5946825)	[('tile_y', [-1, 64]), ('tile_x', [-1, 256])],None,86
No: 4	GFLOPS: 2.51/43.56	result: MeasureResult(costs=(0.10684140580000001,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.8439440727233887, timestamp=1658199547.5213277)	[('tile_y', [-1, 2]), ('tile_x', [-1, 1])],None,1
No: 5	GFLOPS: 36.82/43.56	result: Measu

In [10]:
def eval_op(s, vars, tgt, name, opt, log) -> None:
    func = tvm.build(s, vars, target=tgt, name=name)
    assert func
    
    dev = tvm.device(tgt.kind.name, 0)
    c = tvm.nd.array(np.zeros((M, N), dtype), dev)
    func(a, b, c)
    tvm.testing.assert_allclose(c.numpy(), answer, rtol=1e-5)
    
    evalor = func.time_evaluator(func.entry_name, dev, number=1)
    mean_time = evalor(a, b, c).mean
    print('%s: %f' % (opt, mean_time))
    log.append((opt, mean_time))
    
log = []

NameError: name 'A' is not defined

In [11]:
# apply history best from log file
with autotvm.apply_history_best("matmul.log"):
    with tvm.target.Target("llvm"):
        s, arg_bufs = matmul(N, L, M, "float32")
        func = tvm.build(s, arg_bufs)
        eval_op(s, [A, B, C], target,'matmul', 'AutoTVM', log)

# check correctness
a_np = np.random.uniform(size=(N, L)).astype(np.float32)
b_np = np.random.uniform(size=(L, M)).astype(np.float32)
c_np = a_np.dot(b_np)

c_tvm = tvm.nd.empty(c_np.shape)
func(tvm.nd.array(a_np), tvm.nd.array(b_np), c_tvm)

tvm.testing.assert_allclose(c_np, c_tvm.numpy(), rtol=1e-4)

Finish loading 10 records
