In [1]:
import numpy as np

from tvm import relay
from tvm.relay import testing
import tvm
from tvm import te
from tvm.contrib import graph_executor, graph_runtime
import tvm.testing

In [2]:
batch_size = 1
num_class = 1000
image_shape = (3, 224, 224)
data_shape = (batch_size,) + image_shape
out_shape = (batch_size, num_class)

mod, params = relay.testing.resnet.get_workload(batch_size=batch_size, num_layers=18)
# mod, params = relay.testing.vgg.get_workload(batch_size=batch_size, num_layers=11)

# set show_meta_data=True if you want to show meta data
print(mod.astext(show_meta_data=False))

#[version = "0.0.5"]
def @main(%data: Tensor[(1, 3, 224, 224), float32] /* ty=Tensor[(1, 3, 224, 224), float32] */, %bn_data_gamma: Tensor[(3), float32] /* ty=Tensor[(3), float32] */, %bn_data_beta: Tensor[(3), float32] /* ty=Tensor[(3), float32] */, %bn_data_moving_mean: Tensor[(3), float32] /* ty=Tensor[(3), float32] */, %bn_data_moving_var: Tensor[(3), float32] /* ty=Tensor[(3), float32] */, %conv0_weight: Tensor[(64, 3, 7, 7), float32] /* ty=Tensor[(64, 3, 7, 7), float32] */, %bn0_gamma: Tensor[(64), float32] /* ty=Tensor[(64), float32] */, %bn0_beta: Tensor[(64), float32] /* ty=Tensor[(64), float32] */, %bn0_moving_mean: Tensor[(64), float32] /* ty=Tensor[(64), float32] */, %bn0_moving_var: Tensor[(64), float32] /* ty=Tensor[(64), float32] */, %stage1_unit1_bn1_gamma: Tensor[(64), float32] /* ty=Tensor[(64), float32] */, %stage1_unit1_bn1_beta: Tensor[(64), float32] /* ty=Tensor[(64), float32] */, %stage1_unit1_bn1_moving_mean: Tensor[(64), float32] /* ty=Tensor[(64), float32] */,

In [3]:
from tvm.relay.expr_functor import ExprMutator

class ScheduleConv2d(ExprMutator):
    def __init__(self):
        super().__init__()

    def visit_call(self, expr):
        visit = super().visit_call(expr)
        if expr.op != tvm.relay.op.get("nn.dropout") and expr.op != tvm.relay.op.get("nn.batch_norm"):
            return relay.annotation.on_device(visit, tvm.cuda(0))
        else:
            return visit

In [4]:
for gv in mod.get_global_vars():
    mod[gv] = ScheduleConv2d().visit(mod[gv])
mod = relay.transform.InferType()(mod)

In [5]:
mod

#[version = "0.0.5"]
def @main(%data: Tensor[(1, 3, 224, 224), float32] /* ty=Tensor[(1, 3, 224, 224), float32] */, %bn_data_gamma: Tensor[(3), float32] /* ty=Tensor[(3), float32] */, %bn_data_beta: Tensor[(3), float32] /* ty=Tensor[(3), float32] */, %bn_data_moving_mean: Tensor[(3), float32] /* ty=Tensor[(3), float32] */, %bn_data_moving_var: Tensor[(3), float32] /* ty=Tensor[(3), float32] */, %conv0_weight: Tensor[(64, 3, 7, 7), float32] /* ty=Tensor[(64, 3, 7, 7), float32] */, %bn0_gamma: Tensor[(64), float32] /* ty=Tensor[(64), float32] */, %bn0_beta: Tensor[(64), float32] /* ty=Tensor[(64), float32] */, %bn0_moving_mean: Tensor[(64), float32] /* ty=Tensor[(64), float32] */, %bn0_moving_var: Tensor[(64), float32] /* ty=Tensor[(64), float32] */, %stage1_unit1_bn1_gamma: Tensor[(64), float32] /* ty=Tensor[(64), float32] */, %stage1_unit1_bn1_beta: Tensor[(64), float32] /* ty=Tensor[(64), float32] */, %stage1_unit1_bn1_moving_mean: Tensor[(64), float32] /* ty=Tensor[(64), float32] */,

In [6]:
with tvm.transform.PassContext(
    opt_level=3, config={"relay.fallback_device_type": tvm.cuda().device_type}
):
    exe = relay.vm.compile(
        mod, target={"cpu": tvm.target.Target("llvm"), "cuda": tvm.target.Target("cuda -arch=sm_86")}, params=params
    )

One or more operators have not been tuned. Please tune your model for better performance. Use DEBUG logging level to see more details.


In [7]:
devs = [tvm.cuda(0), tvm.cpu(0)]
vm = tvm.runtime.vm.VirtualMachine(exe, devs)

In [14]:
np.random.seed(0)

data = np.random.uniform(-1, 1, size=data_shape).astype("float32")
p = list(params.values())
input_data = tvm.nd.array(data, tvm.cuda())
out = vm.invoke("main", input_data)
# get output
# out = module.get_output(0, tvm.nd.empty(out_shape)).numpy()

# Print first 10 elements of output
print(out.numpy().flatten()[0:10])

[0.00090203 0.0010247  0.00090637 0.00102216 0.00109492 0.00107084
 0.00105591 0.00095502 0.0011055  0.00113423]


In [17]:
mod, params = relay.testing.resnet.get_workload(batch_size=batch_size, num_layers=18)
opt_level = 3
target = 'llvm'
with tvm.transform.PassContext(opt_level=opt_level):
    lib = relay.build(mod, target, params=params)
# create random input
dev = tvm.cpu()
# data = np.random.uniform(-1, 1, size=data_shape).astype("float32")
# create module
module = graph_executor.GraphModule(lib["default"](dev))
# set input and parameters
module.set_input("data", data)
# run
module.run()
# get output
out_cpu = module.get_output(0, tvm.nd.empty(out_shape)).numpy()

# Print first 10 elements of output
print(out_cpu.flatten()[0:10])

[0.00090203 0.0010247  0.00090637 0.00102216 0.00109492 0.00107084
 0.00105591 0.00095502 0.0011055  0.00113423]


In [13]:
tvm.testing.assert_allclose(out_cpu, out.numpy(), atol=1e-5)

In [None]:
t2 = module.module.time_evaluator("run", tvm.cuda(), number=1, repeat=10)
t2("main").mean

In [None]:
t1 = vm.module.time_evaluator("invoke", tvm.cpu(), number=1, repeat=10)
res = t1("main").mean
t1 = vm.module.time_evaluator("invoke", tvm.cuda(), number=1, repeat=10)
res += t1("main").mean