In [1]:
from urllib.request import urlopen
from PIL import Image
import timm
import torch

img = Image.open(urlopen(
    'https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/beignets-task-guide.png'
))

model = timm.create_model('convnext_xxlarge.clip_laion2b_soup_ft_in1k', pretrained=True)
model = model.eval()

# get model specific transforms (normalization, resize)
data_config = timm.data.resolve_model_data_config(model)
transforms = timm.data.create_transform(**data_config, is_training=False)

  from .autonotebook import tqdm as notebook_tqdm


## Compile to ONNX Runtime

In [16]:
onnxrt_model = torch.compile(model, backend='onnxrt')

In [17]:
%%timeit
output = onnxrt_model(transforms(img).unsqueeze(0))  # unsqueeze single image into batch of 1

1.22 s ± 184 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## Compile to CUDA Graphs

In [18]:
cuda_model = torch.compile(model, backend='cudagraphs')

In [19]:
%%timeit
output = cuda_model(transforms(img).unsqueeze(0))  # unsqueeze single image into batch of 1

1.24 s ± 4.33 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## Torch Inductor Backend

In [20]:
inductor_model = torch.compile(model, backend='inductor')

In [21]:
%%timeit
output = inductor_model(transforms(img).unsqueeze(0))  # unsqueeze single image into batch of 1

832 ms ± 1.89 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## TVM backend

In [22]:
tvm_model = torch.compile(model, backend='tvm')

In [23]:
%%timeit
output = tvm_model(transforms(img).unsqueeze(0))  # unsqueeze single image into batch of 1

859 ms ± 44 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## Max autottune

In [24]:
max_auto_model = torch.compile(model, backend='inductor', mode='max-autotune')

In [25]:
%%timeit
output = max_auto_model(transforms(img).unsqueeze(0))  # unsqueeze single image into batch of 1

847 ms ± 14.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
