In [1]:
!pip install -Uq timm onnx onnxruntime

## PyTorch Inference

In [123]:
from urllib.request import urlopen
from PIL import Image
import timm
import torch

img = Image.open(urlopen(
    'https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/beignets-task-guide.png'
))

model = timm.create_model('efficientvit_b0.r224_in1k', pretrained=True)
model = model.eval()

# get model specific transforms (normalization, resize)
data_config = timm.data.resolve_model_data_config(model)
transforms = timm.data.create_transform(**data_config, is_training=False)



In [125]:
%%timeit
output = model(transforms(img).unsqueeze(0))  # unsqueeze single image into batch of 1

12.9 ms ± 498 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [126]:
top5_probabilities, top5_class_indices = torch.topk(output.softmax(dim=1) * 100, k=5)

In [127]:
top5_probabilities

tensor([[42.1539,  9.7981,  9.3126,  5.4653,  4.5588]],
       grad_fn=<TopkBackward0>)

In [128]:
top5_class_indices

tensor([[928, 551, 969, 967, 505]])

## Convert To ONNX

In [129]:
from timm.utils.model import reparameterize_model
model = reparameterize_model(model)

In [130]:
import torch.onnx
torch.onnx.export(model,
                 torch.rand(1, 3, 224, 224, requires_grad=True),
                 "efficientvit_b0.r224_in1k.onnx",
                 export_params=True,
                 opset_version=16,
                 do_constant_folding=True,
                 input_names=['input'],
                 output_names=['output'], 
                 dynamic_axes={'input' : {0 : 'batch_size'},   
                               'output' : {0 : 'batch_size'}}
)

  _C._jit_pass_onnx_node_shape_type_inference(node, params_dict, opset_version)
  _C._jit_pass_onnx_graph_shape_type_inference(


verbose: False, log level: Level.ERROR



  _C._jit_pass_onnx_graph_shape_type_inference(


## ONNX Inference

In [167]:
import numpy as np
import onnxruntime as ort
from PIL import Image
from urllib.request import urlopen

#define the priority order for the execution providers

# prefer CUDA Execution Provider over CPU Execution Provider
EP_list = ['CUDAExecutionProvider', 'CPUExecutionProvider', 'OpenVINOExecutionProvider']

# Load ONNX model
session = ort.InferenceSession("efficientvit_b0.r224_in1k.onnx", providers=EP_list)

session.set_providers(['CPUExecutionProvider'])

# Load an image
img = Image.open(urlopen('https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/beignets-task-guide.png'))
img = img.convert('RGB')
img = img.resize((224, 224))
img_np = np.array(img).astype(np.float32)

# Convert data to the shape the ONNX model expects
input_data = np.transpose(img_np, (2, 0, 1))  # Convert to (C, H, W)
input_data = np.expand_dims(input_data, axis=0)  # Add a batch dimension

input_data.shape

# Get input name from the model
input_name = session.get_inputs()[0].name



In [168]:
%%timeit
# Perform inference
output = session.run(None, {input_name: input_data})

3.62 ms ± 401 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [160]:
# Extract output data (assuming model has a single output)
output_data = output[0]

output_data.shape

torch.Size([1000])

## Visualize Graph

In [39]:
!pip install -Uq netron

In [40]:
import IPython

import netron
port = 6006
model_path = "efficientvit_b0.r224_in1k.onnx"
netron.start(model_path, 6006, browse=False)

IPython.display.IFrame(f"http://localhost:{port}", width=1000, height=1000)

Serving 'efficientvit_b0.r224_in1k.onnx' at http://localhost:6006


In [41]:
!pip install -Uq onnxsim

In [134]:
!onnxsim efficientvit_b0.r224_in1k.onnx efficientvit_b0.r224_in1k_simplified.onnx

[1;35mYour model contains "Tile" ops or/and "ConstantOfShape" ops. Folding these ops [0m
[1;35mcan make the simplified model much larger. If it is not expected, please specify[0m
[1;35m"--no-large-tensor" (which will lose some optimization chances)[0m
Simplifying[33m...[0m
Finish! Here is the difference:
┏━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┓
┃[1m [0m[1m                 [0m[1m [0m┃[1m [0m[1mOriginal Model[0m[1m [0m┃[1m [0m[1mSimplified Model[0m[1m [0m┃
┡━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━┩
│ Add               │ 21             │ 21               │
│ Cast              │ 20             │ [1;38;5;46m0               [0m │
│ Concat            │ 16             │ [1;38;5;46m12              [0m │
│ Constant          │ 239            │ [1;38;5;46m109             [0m │
│ ConstantOfShape   │ 4              │ [1;38;5;46m0               [0m │
│ Conv              │ 50             │ 50               │
│ Div               │ 9     

In [136]:
import netron
port = 6006
model_path = "efficientvit_b0.r224_in1k_simplified.onnx"
netron.start(model_path, 6009, browse=False)

IPython.display.IFrame(f"http://localhost:{port}", width=1000, height=1000)

Serving 'efficientvit_b0.r224_in1k_simplified.onnx' at http://localhost:6009


In [173]:
import numpy as np
import onnxruntime as ort
from PIL import Image
from urllib.request import urlopen

#define the priority order for the execution providers

# prefer CUDA Execution Provider over CPU Execution Provider
EP_list = ['CUDAExecutionProvider', 'CPUExecutionProvider', 'OpenVINOExecutionProvider']

# Load ONNX model
session = ort.InferenceSession("efficientvit_b0.r224_in1k_simplified.onnx", providers=EP_list)

session.set_providers(['CPUExecutionProvider'])

# Load an image
img = Image.open(urlopen('https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/beignets-task-guide.png'))
img = img.convert('RGB')
img = img.resize((224, 224))
img_np = np.array(img).astype(np.float32)

# Convert data to the shape the ONNX model expects
input_data = np.transpose(img_np, (2, 0, 1))  # Convert to (C, H, W)
input_data = np.expand_dims(input_data, axis=0)  # Add a batch dimension

input_data.shape

# Get input name from the model
input_name = session.get_inputs()[0].name



In [174]:
%%timeit
# Perform inference
output = session.run(None, {input_name: input_data})

3.61 ms ± 286 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


## Quantize ONNX

In [139]:
import onnx
from onnxruntime.quantization import quantize_dynamic, QuantType

model_fp32 = 'efficientvit_b0.r224_in1k_simplified.onnx'
model_quant = 'efficientvit_b0.r224_in1k_simplified_quantized.onnx'
quantized_model = quantize_dynamic(model_fp32, model_quant)


Ignore MatMul due to non constant B: /[/stages/stages.2/blocks/blocks.1/context_module/main/MatMul]
Ignore MatMul due to non constant B: /[/stages/stages.2/blocks/blocks.1/context_module/main/MatMul_1]
Ignore MatMul due to non constant B: /[/stages/stages.2/blocks/blocks.2/context_module/main/MatMul]
Ignore MatMul due to non constant B: /[/stages/stages.2/blocks/blocks.2/context_module/main/MatMul_1]
Ignore MatMul due to non constant B: /[/stages/stages.3/blocks/blocks.1/context_module/main/MatMul]
Ignore MatMul due to non constant B: /[/stages/stages.3/blocks/blocks.1/context_module/main/MatMul_1]
Ignore MatMul due to non constant B: /[/stages/stages.3/blocks/blocks.2/context_module/main/MatMul]
Ignore MatMul due to non constant B: /[/stages/stages.3/blocks/blocks.2/context_module/main/MatMul_1]


In [140]:
import numpy as np
import onnxruntime as ort
from PIL import Image
from urllib.request import urlopen

#define the priority order for the execution providers

# prefer CUDA Execution Provider over CPU Execution Provider
EP_list = ['CUDAExecutionProvider', 'CPUExecutionProvider', 'OpenVINOExecutionProvider']

# Load ONNX model
session = ort.InferenceSession("efficientvit_b0.r224_in1k_simplified_quantized_static.onnx", providers=EP_list)

# session.set_providers(['CPUExecutionProvider'])

# Load an image
img = Image.open(urlopen('https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/beignets-task-guide.png'))
img = img.convert('RGB')
img = img.resize((224, 224))
img_np = np.array(img).astype(np.float32)

# Convert data to the shape the ONNX model expects
input_data = np.transpose(img_np, (2, 0, 1))  # Convert to (C, H, W)
input_data = np.expand_dims(input_data, axis=0)  # Add a batch dimension

input_data.shape

# Get input name from the model
input_name = session.get_inputs()[0].name



In [141]:
%%timeit
# Perform inference
output = session.run(None, {input_name: input_data})

12.3 ms ± 2.55 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)


## ONNX to OpenVINO

In [48]:
!pip install -Uq openvino

In [175]:
import openvino as ov
ov_model = ov.convert_model('efficientvit_b0.r224_in1k_simplified.onnx')

###### Option 1: Save to OpenVINO IR:

# save model to OpenVINO IR for later use
ov.save_model(ov_model, 'efficientvit_b0.r224_in1k_simplified.onnx.xml')

In [176]:
# Load an image
img = Image.open(urlopen('https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/beignets-task-guide.png'))
img = img.convert('RGB')
img = img.resize((224, 224))
img_np = np.array(img).astype(np.float32)

# Convert data to the shape the ONNX model expects
input_data = np.transpose(img_np, (2, 0, 1))  # Convert to (C, H, W)
input_data = np.expand_dims(input_data, axis=0)  # Add a batch dimension

In [177]:
input_data.shape

(1, 3, 224, 224)

In [178]:
###### Option 2: Compile and infer with OpenVINO:

# compile model
compiled_model = ov.compile_model(ov_model)

In [179]:
%%timeit
# run inference
result = compiled_model(input_data)

2.42 ms ± 242 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


## PyTorch to OpenVINO

In [214]:
from urllib.request import urlopen
from PIL import Image
import timm
import torch

img = Image.open(urlopen(
    'https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/beignets-task-guide.png'
))

model = timm.create_model('efficientvit_b0.r224_in1k', pretrained=True)
model = model.eval()


In [215]:
import openvino as ov

# Create OpenVINO Core object instance
core = ov.Core()

# Convert model to openvino.runtime.Model object
ov_model = ov.convert_model(model)

MODEL_NAME = 'efficientvit_b0.r224_in1k'

# Save openvino.runtime.Model object on disk
ov.save_model(ov_model, f"{MODEL_NAME}_dynamic.xml")

# Load OpenVINO model on device
compiled_model = core.compile_model(ov_model, 'AUTO')

input_tensor=transforms(img).unsqueeze(0)


In [216]:
%%timeit
result = compiled_model(input_tensor)[0]

2.37 ms ± 128 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [217]:
result.shape

(1, 1000)

## PyTorch to Torchscript

In [85]:
import torch
from torch.utils.mobile_optimizer import optimize_for_mobile

model.eval()
example = torch.rand(1, 3, 224, 224)
traced_script_module = torch.jit.trace(model, example)
optimized_traced_model = optimize_for_mobile(traced_script_module)
optimized_traced_model._save_for_lite_interpreter("torchscript_efficientvit_b0.r224_in1k.pt")

In [96]:
import torch

# Step 1: Load the TorchScript model
model = torch.jit.load("torchscript_efficientvit_b0.r224_in1k.pt")

# Step 2: Prepare input data
# Assuming the model expects a 1D tensor of size 10 as input
# Load an image
img = Image.open(urlopen('https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/beignets-task-guide.png'))
img = img.convert('RGB')
img = img.resize((224, 224))
img_np = np.array(img).astype(np.float32)

# Convert data to the shape the ONNX model expects
input_data = np.transpose(img_np, (2, 0, 1))  # Convert to (C, H, W)
input_data = np.expand_dims(input_data, axis=0)  # Add a batch dimension


In [99]:
input_data.shape

(1, 3, 224, 224)

In [100]:
input_tensor = torch.tensor(input_data)

In [110]:
%%timeit

# Step 3: Run inference
with torch.no_grad():
    output = model(input_tensor)

1.8 s ± 136 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [111]:
# Step 4: Process output
# Convert to NumPy array or perform other operations
output_array = output.numpy()

In [112]:
output_array.shape

(1, 1000)

In [113]:
model

RecursiveScriptModule(original_name=EfficientVit)

## PyTorch to OpenVINO - torch.compile

In [153]:
import openvino.torch
model = torch.compile(model, backend='openvino')
# OR
# model = torch.compile(model, backend='openvino_ts')

In [154]:
model

OptimizedModule(
  (_orig_mod): EfficientVit(
    (stem): Stem(
      (in_conv): ConvNormAct(
        (dropout): Dropout(p=0.0, inplace=False)
        (conv): Conv2d(3, 8, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (norm): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Hardswish()
      )
      (res0): ResidualBlock(
        (pre_norm): Identity()
        (main): DSConv(
          (depth_conv): ConvNormAct(
            (dropout): Dropout(p=0.0, inplace=False)
            (conv): Conv2d(8, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=8, bias=False)
            (norm): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (act): Hardswish()
          )
          (point_conv): ConvNormAct(
            (dropout): Dropout(p=0.0, inplace=False)
            (conv): Conv2d(8, 8, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (norm): BatchNorm2d(8, eps=1e-0

In [155]:
# get model specific transforms (normalization, resize)
data_config = timm.data.resolve_model_data_config(model)
transforms = timm.data.create_transform(**data_config, is_training=False)

In [157]:
%%timeit
output = model(transforms(img).unsqueeze(0))  # unsqueeze single image into batch of 1

7.32 ms ± 455 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
