# Optimizing ONNX

In [1]:
!pip install onnxruntime onnx transformers optimum

Collecting onnxruntime
  Downloading onnxruntime-1.15.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.9/5.9 MB[0m [31m42.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting onnx
  Downloading onnx-1.14.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (14.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m14.6/14.6 MB[0m [31m71.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting transformers
  Downloading transformers-4.30.2-py3-none-any.whl (7.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.2/7.2 MB[0m [31m91.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting optimum
  Downloading optimum-1.9.1.tar.gz (252 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m252.3/252.3 kB[0m [31m24.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import numpy as np
import pandas as pd
import torch
from torch import optim
import torch.nn.functional as F

from transformers import BertForSequenceClassification, BertConfig, BertTokenizer

from pathlib import Path
import timeit
import onnxruntime as ort
from onnxruntime import InferenceSession
from onnxruntime.transformers.optimizer import optimize_model
from optimum.onnxruntime import ORTModelForSequenceClassification

The only difference is that we use the `optimize_model` function to optimize the graph inside the model.

In [9]:
# Create a PATH to save the model
model_onnx_path = Path("/content/drive/MyDrive/Models/indobert-onnx")
optimized_onnx_path = str(model_onnx_path / "optimized.onnx")

# Optimize the model using ORT transformer optimizer with its default values
optimized_model = optimize_model(
    input=str(model_onnx_path / "model.onnx"),
    model_type="bert",
    use_gpu=False
)

# Save the model
optimized_model.save_model_to_file(optimized_onnx_path)

In [10]:
token_load_path = Path("/content/drive/MyDrive/Models/indobert")
tokenizer = BertTokenizer.from_pretrained(token_load_path)

# Create inputs for the model
text = ['Bahagia hatiku melihat pernikahan putri sulungku yang cantik jelita']
inputs = tokenizer(text)

inputs_onnx = dict(
    input_ids=np.array(inputs["input_ids"]).astype("int64"),
    attention_mask=np.array(inputs["attention_mask"]).astype("int64"),
    token_type_ids=np.array(inputs["token_type_ids"]).astype("int64")
)

inputs_onnx

{'input_ids': array([[    2,  4771, 10413,   722,  3300,  3466, 19227,   457,    34,
          2176, 17377,   155,     3]]),
 'attention_mask': array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]),
 'token_type_ids': array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])}

In [11]:
# Create ONNX session to do inference
opt_sess = InferenceSession(optimized_onnx_path, providers=["CPUExecutionProvider"])

In [12]:
i2w = {0: 'positive', 1: 'neutral', 2: 'negative'}

# Do the inference
logits = opt_sess.run(None, input_feed=inputs_onnx)[0]
label = torch.topk(torch.from_numpy(logits), k=1, dim=-1)[1].squeeze().item()
probability = F.softmax(torch.from_numpy(logits), dim=-1).squeeze()[label].item()

print(f"Label: {i2w[label]}\nProbability: {probability}")

Label: positive
Probability: 0.9998039603233337


The optimized model produces the same result as the non-optimized one.