# Model Benchmark
This notebook shows you a total inference time of Mask RCNN ResNet50 model from an image loading to postprocessing

In [1]:
import time
import requests

from io import BytesIO
from PIL import Image

import torch
import torchvision
import transforms as T

import ray

In [2]:
# Setup for test 

# Model Loading
model = torchvision.models.detection.__dict__[
                "maskrcnn_resnet50_fpn"](pretrained=True).cuda().eval()

# Image Loading
test_image_bytes = requests.get(
        "http://farm8.staticflickr.com/7353/9879082044_66c4f5a6fb_z.jpg"
    ).content
image_payload_bytes = test_image_bytes
pil_image = Image.open(BytesIO(image_payload_bytes))
pil_images = [pil_image]

# Image Preprocessing
input_tensor = torch.cat(
                [T.ToTensor()(i)[0] for i in pil_images]).cuda()

# Inference
output_tensor = model([input_tensor])

# Prediction Postprocessing
result = {}
for k in output_tensor[0]:
    result[k] = output_tensor[0][k].cpu().detach().numpy()

In [3]:
%%timeit
# Image Loading
test_image_bytes = requests.get(
        "http://farm8.staticflickr.com/7353/9879082044_66c4f5a6fb_z.jpg"
    ).content
image_payload_bytes = test_image_bytes
pil_image = Image.open(BytesIO(image_payload_bytes))
pil_images = [pil_image]

44 ms ± 1.54 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [4]:
%%timeit
# Image Preprocessing
input_tensor = torch.cat(
                [T.ToTensor()(i)[0] for i in pil_images]).cuda()

3.15 ms ± 432 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [5]:
%%timeit
# Inference
with torch.no_grad():
    output_tensor = model([input_tensor])


72.7 ms ± 1.17 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [6]:
%%timeit
# Prediction Postprocessing
result = {}
for k in output_tensor[0]:
    result[k] = output_tensor[0][k].cpu().detach().numpy()

402 µs ± 7.59 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [7]:
%%timeit
# test all
test_image_bytes = requests.get(
        "http://farm8.staticflickr.com/7353/9879082044_66c4f5a6fb_z.jpg"
    ).content
image_payload_bytes = test_image_bytes
pil_image = Image.open(BytesIO(image_payload_bytes))
pil_images = [pil_image]
input_tensor = torch.cat(
                [T.ToTensor()(i)[0] for i in pil_images]).cuda()
output_tensor = model([input_tensor])
result = {}
for k in output_tensor[0]:
    result[k] = output_tensor[0][k].cpu().detach().numpy()

154 ms ± 7.18 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
