In [1]:
%matplotlib inline

Matplotlib is building the font cache; this may take a moment.



Compile PyTorch Object Detection Models
=======================================
This article is an introductory tutorial to deploy PyTorch object
detection models with Relay VM.

For us to begin with, PyTorch should be installed.
TorchVision is also required since we will be using it as our model zoo.

A quick solution is to install via pip

.. code-block:: bash

    pip install torch==1.7.0
    pip install torchvision==0.8.1

or please refer to official site
https://pytorch.org/get-started/locally/

PyTorch versions should be backwards compatible but should be used
with the proper TorchVision version.

Currently, TVM supports PyTorch 1.7 and 1.4. Other versions may
be unstable.



In [3]:
!pip install opencv-python
!pip install torch==1.7.0
!pip install torchvision==0.8.1

Collecting torch==1.7.0
  Downloading torch-1.7.0-cp37-cp37m-manylinux1_x86_64.whl (776.7 MB)
[K     |████████████████████████████████| 776.7 MB 997 bytes/s  0:00:0101
Collecting dataclasses
  Downloading dataclasses-0.6-py3-none-any.whl (14 kB)
Installing collected packages: dataclasses, torch
Successfully installed dataclasses-0.6 torch-1.7.0
Collecting torchvision==0.8.1
  Downloading torchvision-0.8.1-cp37-cp37m-manylinux1_x86_64.whl (12.7 MB)
[K     |████████████████████████████████| 12.7 MB 20.5 MB/s eta 0:00:01
Installing collected packages: torchvision
Successfully installed torchvision-0.8.1


In [4]:
import tvm
from tvm import relay
from tvm import relay
from tvm.runtime.vm import VirtualMachine
from tvm.contrib.download import download

import numpy as np
import cv2

# PyTorch imports
import torch
import torchvision

Load pre-trained maskrcnn from torchvision and do tracing
---------------------------------------------------------



In [5]:
in_size = 300

input_shape = (1, 3, in_size, in_size)


def do_trace(model, inp):
    model_trace = torch.jit.trace(model, inp)
    model_trace.eval()
    return model_trace


def dict_to_tuple(out_dict):
    if "masks" in out_dict.keys():
        return out_dict["boxes"], out_dict["scores"], out_dict["labels"], out_dict["masks"]
    return out_dict["boxes"], out_dict["scores"], out_dict["labels"]


class TraceWrapper(torch.nn.Module):
    def __init__(self, model):
        super().__init__()
        self.model = model

    def forward(self, inp):
        out = self.model(inp)
        return dict_to_tuple(out[0])


model_func = torchvision.models.detection.maskrcnn_resnet50_fpn
model = TraceWrapper(model_func(pretrained=True))

model.eval()
inp = torch.Tensor(np.random.uniform(0.0, 250.0, size=(1, 3, in_size, in_size)))

with torch.no_grad():
    out = model(inp)
    script_module = do_trace(model, inp)

Downloading: "https://download.pytorch.org/models/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth" to /home/ubuntu/.cache/torch/hub/checkpoints/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth


HBox(children=(FloatProgress(value=0.0, max=178090079.0), HTML(value='')))




  dtype=torch.float32)).float())) for i in range(dim)]
  torch.tensor(image_size[1] // g[1], dtype=torch.int64, device=device)] for g in grid_sizes]
  boxes_x = torch.min(boxes_x, torch.tensor(width, dtype=boxes.dtype, device=boxes.device))
  boxes_y = torch.min(boxes_y, torch.tensor(height, dtype=boxes.dtype, device=boxes.device))
  for s, s_orig in zip(new_size, original_size)
  return torch.tensor(M + 2 * padding).to(torch.float32) / torch.tensor(M).to(torch.float32)


Download a test image and pre-process
-------------------------------------



In [6]:
img_path = "test_street_small.jpg"
img_url = (
    "https://raw.githubusercontent.com/dmlc/web-data/" "master/gluoncv/detection/street_small.jpg"
)
download(img_url, img_path)

img = cv2.imread(img_path).astype("float32")
img = cv2.resize(img, (in_size, in_size))
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = np.transpose(img / 255.0, [2, 0, 1])
img = np.expand_dims(img, axis=0)

Downloading from url https://raw.githubusercontent.com/dmlc/web-data/master/gluoncv/detection/street_small.jpg to test_street_small.jpg
...100%, 0.12 MB, 936 KB/s, 0 seconds passed


Import the graph to Relay
-------------------------



In [7]:
input_name = "input0"
shape_list = [(input_name, input_shape)]
mod, params = relay.frontend.from_pytorch(script_module, shape_list)

...100%, 0.02 MB, 240 KB/s, 0 seconds passed




Compile with Relay VM
---------------------
Note: Currently only CPU target is supported. For x86 target, it is
highly recommended to build TVM with Intel MKL and Intel OpenMP to get
best performance, due to the existence of large dense operator in
torchvision rcnn models.



In [8]:
# Add "-libs=mkl" to get best performance on x86 target.
# For x86 machine supports AVX512, the complete target is
# "llvm -mcpu=skylake-avx512 -libs=mkl"
target = "llvm"

with tvm.transform.PassContext(opt_level=3, disabled_pass=["FoldScaleAxis"]):
    vm_exec = relay.vm.compile(mod, target=target, params=params)









Inference with Relay VM
-----------------------



In [9]:
dev = tvm.cpu()
vm = VirtualMachine(vm_exec, dev)
vm.set_input("main", **{input_name: img})
tvm_res = vm.run()

Get boxes with score larger than 0.9
------------------------------------



In [None]:
score_threshold = 0.9
boxes = tvm_res[0].asnumpy().tolist()
valid_boxes = []
for i, score in enumerate(tvm_res[1].asnumpy().tolist()):
    if score > score_threshold:
        valid_boxes.append(boxes[i])
    else:
        break

print("Get {} valid boxes".format(len(valid_boxes)))