## Load ONNX model and export it

In [1]:
import os
from optimum.onnxruntime import ORTModelForTokenClassification
onnx_model_path = os.path.join(os.getcwd(), "model_onnx")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
onnx_model = ORTModelForTokenClassification.from_pretrained("arnabdhar/xlm-roberta-base-wikineural", export=True)
onnx_model.save_pretrained(onnx_model_path)

Framework not specified. Using pt to export to ONNX.
Using the export variant default. Available variants are:
    - default: The default ONNX variant.
Using framework PyTorch: 2.1.1+cpu
Overriding 1 configuration item(s)
	- use_cache -> False


## Apply Graph Optimization

In [3]:
from optimum.onnxruntime import ORTOptimizer
from optimum.onnxruntime.configuration import AutoOptimizationConfig
optimized_model_path = os.path.join(os.getcwd(), "model_optimized")

In [4]:
graph_optimization_config = AutoOptimizationConfig.O2(for_gpu=False, fp16=False)
optimizer = ORTOptimizer.from_pretrained(onnx_model_path)



In [5]:
optimizer.optimize(
    optimization_config=graph_optimization_config,
    save_dir=optimized_model_path
)

Optimizing model...
Configuration saved in d:\transformer-model-optimization\model_optimized\ort_config.json
Optimized model saved at: d:\transformer-model-optimization\model_optimized (external data format: False; saved all tensor to one file: True)


WindowsPath('d:/transformer-model-optimization/model_optimized')

## Apply Dynamic Quantization

In [6]:
from optimum.onnxruntime import ORTQuantizer
from optimum.onnxruntime.configuration import AutoQuantizationConfig
quantized_model_path = os.path.join(os.getcwd(), "model_quantized")

In [7]:
dq_config = AutoQuantizationConfig.avx2(is_static=False, per_channel=False)
quantizer = ORTQuantizer.from_pretrained(onnx_model)

In [8]:
quantizer.quantize(
    quantization_config=dq_config,
    save_dir=quantized_model_path
)

Creating dynamic quantizer: QOperator (mode: IntegerOps, schema: u8/u8, channel-wise: False)
Quantizing model...
Saving quantized model at: d:\transformer-model-optimization\model_quantized (external data format: False)
Configuration saved in d:\transformer-model-optimization\model_quantized\ort_config.json


WindowsPath('d:/transformer-model-optimization/model_quantized')