https://discuss.tvm.apache.org/t/failures-using-many-of-onnx-model-zoo-models/10268supposed to use specific torch/torchvision version and transfer through pytorch script

In [None]:
# !wget https://www.cis.upenn.edu/~jshi/ped_html/PennFudanPed.zip
# !unzip PennFudanPed.zip
!pip uninstall -y torch torchvision
!pip install torch==1.7.0
!pip install torchvision==0.8.1

# Dependency

In [2]:
import os
import numpy as np
import torch
import torchvision.models as models
import torchvision
import onnx

import tvm
from tvm import relay, autotvm
from tvm.relay import testing
from tvm.runtime.vm import VirtualMachine
from tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner
from tvm.autotvm.graph_tuner import DPTuner, PBQPTuner
import tvm.contrib.graph_executor as runtime

import cv2
import time

# Config

In [2]:
# Define the neural network and compilation target.
# If the target machine supports avx512 instructions, replace the
# "llvm -mcpu=core-avx2" with "llvm -mcpu=skylake-avx512"
class cfg:
    model_name = "mask_rcnn"
    model_path = f"ONNX_MODELS/{model_name}.onnx"
    input_name = "input0"

    use_sparse = False
    batch_size = 1
    input_shape = (3, 224, 224)
    output_shape = (batch_size, 801)
    dtype = "float32"
    layout = "NCHW"    
    opset_version = 11
    target = "llvm"
#     graph_opt_sch_file = "..."
#     log_file = "..."
    json_file = "TVM_FILES/%s-%s-B%d-%s.json" % (model_name, layout, batch_size, target.kind.name)
#     Set number of threads used for tuning based on the number ofphysical CPU cores on your machine.
    num_threads = 2
    dev = tvm.device(str(target), 0)

#     new
    output_name = "output"
    image_dir = '~/PennFudanPed/PNGImages'
    image_paths = [os.path.join(root, file) for root, _, files in os.walk(image_dir) for file in files]
    dynamic_model_path = f"ONNX_MODELS/dynamic_{model_name}.onnx"
    score_threshold = 0.9
    
os.environ["TVM_NUM_THREADS"] = str(cfg.num_threads)

# export model

In [None]:
def dict_to_tuple(out_dict):
    if "masks" in out_dict.keys():
        return out_dict["boxes"], out_dict["scores"], out_dict["labels"], out_dict["masks"]
    return out_dict["boxes"], out_dict["scores"], out_dict["labels"]


class TraceWrapper(torch.nn.Module):
    def __init__(self, model):
        super().__init__()
        self.model = model

    def forward(self, inp):
        out = self.model(inp)
        return dict_to_tuple(out[0])  
    
def do_trace(model, inp):
    model_trace = torch.jit.trace(model, inp)
    model_trace.eval()
    return model_trace

In [None]:
# model
model_func = torchvision.models.detection.maskrcnn_resnet50_fpn
model = TraceWrapper(model_func(pretrained=True))
model.eval()

## onnx

In [3]:
data_shape = (cfg.batch_size,) + cfg.input_shape
input_data = torch.randn(data_shape)
input_names = [cfg.input_name]
output_names = [cfg.output_name]
dynamic_axes = {cfg.input_name: {2:'width', 3:'height'}, cfg.output_name : {2:'width', 3:'height'}}

# static model
torch.onnx.export(model, input_data, cfg.model_path, input_names=input_names, verbose=True, opset_version=cfg.opset_version)
check_onnx_model(cfg.model_path)

# dynamic model
torch.onnx.export(model, input_data, cfg.dynamic_model_path, input_names=input_names, output_names=output_names, dynamic_axes=dynamic_axes, opset_version=cfg.opset_version)
check_onnx_model(cfg.dynamic_model_path)

# Loading tvm module from pytorch script model

In [4]:
data_shape = (cfg.batch_size,) + cfg.input_shape
shape_list = [(cfg.input_name, data_shape)]
shape_dict = {input_name: data_shape}
inp = torch.Tensor(np.random.uniform(0.0, 250.0, size=data_shape))

with torch.no_grad():
    out = model(inp)
    script_module = do_trace(model, inp)

# pytorch
mod, params = relay.frontend.from_pytorch(script_module, shape_list)
    
## onnx fails
# onnx_model = onnx.load(cfg.model_path)
# mod, params = relay.frontend.from_onnx(onnx_model, shape=shape_dict, dtype=cfg.dtype)

# Compiling TVM

In [None]:
# # onnx / keras
with tvm.transform.PassContext(opt_level=3, disabled_pass=["FoldScaleAxis"]):
    intrp = relay.build_module.create_executor("vm", mod, tvm.cpu(0), cfg.target)    

tvm_output = intrp.evaluate()(tvm.nd.array(torch.randn(data_shape).astype(cfg.dtype)), **params).numpy()

# # pytorch / tf / mxnet / tflite / coreml / darknet / caffe
# with tvm.transform.PassContext(opt_level=3, disabled_pass=["FoldScaleAxis"]):
#     lib = relay.build(mod, target=target, params=params)

In [24]:
vm_exec.primitive_ops

Module(VMExecutable, 35c55608)

In [12]:
# vm
# with tvm.transform.PassContext(opt_level=3, disabled_pass=["FoldScaleAxis"]):
#     vm_exec = relay.vm.compile(mod, target=target, params=params)
    
# vm = VirtualMachine(vm_exec, dezv)
vm.set_input("main", **{cfg.input_name: img})
tvm_res = vm.run()

boxes = tvm_res[0].numpy().tolist()
valid_boxes = []
for i, score in enumerate(tvm_res[1].numpy().tolist()):
    if score > cfg.score_threshold:
        valid_boxes.append(boxes[i])
    else:
        break

print("Get {} valid boxes".format(len(valid_boxes)))

Get 12 valid boxes


# Inference

In [9]:
img_url = (
    "https://raw.githubusercontent.com/dmlc/web-data/" "master/gluoncv/detection/street_small.jpg"
)
from tvm.contrib.download import download_testdata
img_path = download_testdata(img_url, "test_street_small.jpg", module="data")

img = cv2.imread(img_path).astype(cfg.dtype)
img = cv2.resize(img, cfg.input_shape[1:])
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = np.transpose(img / 255.0, [2, 0, 1])
img = np.expand_dims(img, axis=0)

File /home/luhsuanwen/.tvm_test_data/data/test_street_small.jpg exists, skip.


In [None]:
with autotvm.apply_graph_best(cfg.graph_opt_sch_file):
    print("Compile...")
    with tvm.transform.PassContext(opt_level=3):
        lib = relay.build_module.build(mod, target=cfg.target, params=params)

        # upload parameters to device

        module = runtime.GraphModule(lib["default"](cfg.dev))

def inference(img):
    module.set_input(cfg.input_name, tvm.nd.array(img.astype(cfg.dtype)))
    module.run()
    tvm_output = m.get_output(0)
    return tvm_output

inference(img)

# Auto Schedule

In [26]:
from tvm import relay, auto_scheduler
from tvm.relay import data_dep_optimization as ddo
import tvm.relay.testing
from tvm.contrib import graph_executor

In [None]:

def get_tvm_module_N_params(
    model_path, 
    input_name="input.1",
    batch_size=1,
    input_shape=(3, 224, 224),
    layout="NHWC", 
    dtype="float32", 
    use_sparse=False
):
    """Get the symbol definition and random weight of a network"""

    data_shape = (batch_size,) + input_shape
    # auto-scheduler prefers NHWC layout
    assert layout == "NCHW"
    mod, params = get_network_from_onnx(model_path, input_name, data_shape, dtype)
    mod = convert_layout(mod)
    net = mod["main"]
#     net = relay.Function(net.params, relay.nn.softmax(net.body), None, net.type_params, net.attrs)
#     mod = tvm.IRModule.from_expr(net)

    if use_sparse:
        from tvm.topi.sparse.utils import convert_model_dense_to_sparse
        mod, params = convert_model_dense_to_sparse(mod, params, bs_r=4, random_params=True)
    return mod, params

def run_tuning(tasks, task_weights, json_file, trials=1000, use_sparse=False):
    print("Begin tuning...")
    tuner = auto_scheduler.TaskScheduler(tasks, task_weights)
    tune_option = auto_scheduler.TuningOptions(
        num_measure_trials=trials,  # change this to 20000 to achieve the best performance
        runner=auto_scheduler.LocalRunner(repeat=10, enable_cpu_cache_flush=True),
        measure_callbacks=[auto_scheduler.RecordToFile(json_file)],
    )

    if use_sparse:
        from tvm.topi.sparse.utils import sparse_sketch_rules

        search_policy = [
            auto_scheduler.SketchPolicy(
                task,
                program_cost_model=auto_scheduler.XGBModel(),
                init_search_callbacks=sparse_sketch_rules(),
            )
            for task in tasks
        ]

        tuner.tune(tune_option, search_policy=search_policy)
    else:
        tuner.tune(tune_option)


In [None]:
# Extract tasks from the network
print("Get module...")
mod, params = get_tvm_module_N_params(
    cfg.model_path, 
    input_name=cfg.input_name,
    batch_size=cfg.batch_size
    input_shape=cfg.input_shape,
    layout=cfg.layout,
    dtype=cfg.dtype,
    use_sparse=cfg.use_sparse,
)
print("Extract tasks...")
tasks, task_weights = auto_scheduler.extract_tasks(mod["main"], params, cfg.target)

for idx, task in enumerate(tasks):
    print("========== Task %d  (workload key: %s) ==========" % (idx, task.workload_key))
    print(task.compute_dag)
    

run_tuning(tasks, task_weights, cfg.json_file)

Get errors with GraphExecutorCodegen for task extraction. Fallback to VMCompiler.
