In [1]:
from urllib.request import urlopen
from PIL import Image
import timm
import torch

img = Image.open(urlopen(
    'https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/beignets-task-guide.png'
))

model = timm.create_model('efficientvit_b0.r224_in1k', pretrained=True)
model = model.eval()

# get model specific transforms (normalization, resize)
data_config = timm.data.resolve_model_data_config(model)
transforms = timm.data.create_transform(**data_config, is_training=False)



  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# quantize model
quantized_model = torch.quantization.quantize_dynamic(
    model, {torch.nn.Linear}, dtype=torch.qint8
)

In [3]:
quantized_model

EfficientVit(
  (stem): Stem(
    (in_conv): ConvNormAct(
      (dropout): Dropout(p=0.0, inplace=False)
      (conv): Conv2d(3, 8, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (norm): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (act): Hardswish()
    )
    (res0): ResidualBlock(
      (pre_norm): Identity()
      (main): DSConv(
        (depth_conv): ConvNormAct(
          (dropout): Dropout(p=0.0, inplace=False)
          (conv): Conv2d(8, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=8, bias=False)
          (norm): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (act): Hardswish()
        )
        (point_conv): ConvNormAct(
          (dropout): Dropout(p=0.0, inplace=False)
          (conv): Conv2d(8, 8, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (norm): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (act):

In [10]:
%%timeit
output = model(transforms(img).unsqueeze(0))  

11.9 ms ± 305 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [11]:
%%timeit
output = quantized_model(transforms(img).unsqueeze(0))  

13.8 ms ± 474 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [9]:
import os 

def print_size_of_model(model):
    torch.save(model.state_dict(), "temp.p")
    print('Size (MB):', os.path.getsize("temp.p")/(1024*1024))
    os.remove('temp.p')

print_size_of_model(model)
print_size_of_model(quantized_model)

Size (MB): 13.097017288208008
Size (MB): 5.68641471862793


In [17]:
import torch.onnx
torch.onnx.export(quantized_model,
                 torch.rand(1, 3, 224, 224, requires_grad=True),
                 "efficientvit_b0.r224_in1k_pytorch_quantized.onnx",
                 export_params=True,
                 opset_version=16,
                 do_constant_folding=True,
                 input_names=['input'],
                 output_names=['output'], 
                 dynamic_axes={'input' : {0 : 'batch_size'},   
                               'output' : {0 : 'batch_size'}}
)

verbose: False, log level: Level.ERROR
ERROR: missing-standard-symbolic-function
Exporting the operator 'quantized::linear_dynamic' to ONNX opset version 16 is not supported. Please feel free to request support or submit a pull request on PyTorch GitHub: https://github.com/pytorch/pytorch/issues.
None
<Set verbose=True to see more details>




UnsupportedOperatorError: Exporting the operator 'quantized::linear_dynamic' to ONNX opset version 16 is not supported. Please feel free to request support or submit a pull request on PyTorch GitHub: https://github.com/pytorch/pytorch/issues.