In [1]:
import tvm
from tvm import te
import numpy as np
from tvm.contrib import graph_executor as runtime
from tvm import relay
from tvm.relay import testing
import tvm.testing

In [2]:
out_channels = 16
batch_size = 1

data = relay.var("data", relay.TensorType((batch_size, 3, 224, 224), "float32"))
weight = relay.var("weight")
bn_gamma = relay.var("bn_gamma")
bn_beta = relay.var("bn_beta")
bn_mmean = relay.var("bn_mean")
bn_mvar = relay.var("bn_var")

simple_net = relay.nn.conv2d(
    data=data, weight=weight, kernel_size=(3, 3), channels=out_channels, padding=(1, 1)
)
simple_net = relay.nn.batch_norm(simple_net, bn_gamma, bn_beta, bn_mmean, bn_mvar)[0]
simple_net = relay.nn.relu(simple_net)
simple_net = relay.Function(relay.analysis.free_vars(simple_net), simple_net)

data_shape = (batch_size, 3, 224, 224)
net, params = testing.create_workload(simple_net)

In [10]:
import logging

logging.basicConfig(level=logging.DEBUG)  # to dump TVM IR after fusion

target = "llvm"
lib = relay.build_module.build(net, target, params=params)

dev = tvm.device(target, 0)
data = np.random.uniform(-1, 1, size=data_shape).astype("float32")
module = runtime.GraphModule(lib["default"](dev))
module.set_input("data", data)
module.run()
out_shape = (batch_size, out_channels, 224, 224)
out = module.get_output(0, tvm.nd.empty(out_shape))
out_llvm = out.numpy()

DEBUG:autotvm:Finish loading 35 records
INFO:te_compiler:Using injective.cpu for add based on highest priority (10)
INFO:te_compiler:Using injective.cpu for sqrt based on highest priority (10)
INFO:te_compiler:Using injective.cpu for divide based on highest priority (10)
INFO:te_compiler:Using injective.cpu for multiply based on highest priority (10)
INFO:te_compiler:Using injective.cpu for expand_dims based on highest priority (10)
INFO:te_compiler:Using injective.cpu for negative based on highest priority (10)
INFO:te_compiler:Using injective.cpu for multiply based on highest priority (10)
INFO:te_compiler:Using injective.cpu for add based on highest priority (10)
INFO:te_compiler:Using injective.cpu for expand_dims based on highest priority (10)
INFO:te_compiler:Using conv2d_nchw.x86 for nn.conv2d based on highest priority (10)
INFO:te_compiler:Using injective.cpu for multiply based on highest priority (10)
INFO:te_compiler:Using injective.cpu for add based on highest priority (10)


In [9]:
out_llvm

array([[[[0.        , 0.1376777 , 0.6680558 , ..., 0.        ,
          0.01324231, 0.        ],
         [0.17444447, 0.        , 0.        , ..., 0.        ,
          0.        , 0.07642334],
         [0.        , 0.        , 0.36327475, ..., 0.42015466,
          0.34397477, 0.2568761 ],
         ...,
         [0.39279926, 0.        , 0.44549635, ..., 0.        ,
          0.        , 0.        ],
         [0.13364975, 0.5340484 , 0.        , ..., 0.40979537,
          0.        , 0.        ],
         [0.08624469, 0.2057824 , 0.        , ..., 0.        ,
          0.1536131 , 0.194868  ]],

        [[0.        , 0.        , 0.        , ..., 0.        ,
          0.26647976, 0.        ],
         [0.        , 0.07988246, 0.21923622, ..., 0.26070654,
          0.05948642, 0.02425401],
         [0.        , 0.        , 0.3036386 , ..., 0.        ,
          0.10459031, 0.        ],
         ...,
         [0.0955115 , 0.        , 0.3549588 , ..., 0.        ,
          0.5713969 , 0. 