In [14]:
import onnx
import onnxruntime
import torch
from models.transformer_classifier import HateSpeechDetector


In [15]:
phobert_bi_clf = HateSpeechDetector.load_model(model_dir="models/phobert-binary/", num_labels=2)

### Dummy Input

In [16]:
text = "This is a sample input."
inputs = phobert_bi_clf.tokenizer(text, return_tensors="pt")

### Export to ONNX

In [17]:
torch.onnx.export(
    phobert_bi_clf.model,                                      # The PyTorch model
    (inputs["input_ids"], inputs["attention_mask"]),  # Model inputs
    "models/phobert-binary/onnx/phobert-binary-2.onnx",                               # Output file name
    input_names=["input_ids", "attention_mask"],  # Input tensor names
    output_names=["output"],                   # Output tensor name
    dynamic_axes={                             # Enable dynamic shapes
        "input_ids": {0: "batch_size", 1: "seq_len"},
        "attention_mask": {0: "batch_size", 1: "seq_len"},
        "output": {0: "batch_size", 1: "seq_len"}
    },
    half= True, 
    optimize =True,
    opset_version=14                           # ONNX opset version
)

TypeError: export() got an unexpected keyword argument 'half'

### Check Model

In [2]:
onnx_phobert = "models/phobert-binary/onnx/phobert-binary.onnx"

In [7]:
# Load and check the ONNX model
onnx_model = onnx.load(onnx_phobert)
onnx.checker.check_model(onnx_model)

# Use ONNX Runtime for inference
ort_session = onnxruntime.InferenceSession(onnx_phobert)

# Prepare inputs for ONNX Runtime
onnx_inputs = {
    "input_ids": inputs["input_ids"].numpy(),
    "attention_mask": inputs["attention_mask"].numpy(),
}

# Run inference
outputs = ort_session.run(None, onnx_inputs)
print("ONNX model output:", outputs)

ONNX model output: [array([[ 2.0608735, -1.8782197]], dtype=float32)]


In [13]:
import numpy as np
predictions = np.argmax(outputs[0], axis=1)
print(predictions)

[0]


### Quantization

In [22]:
from onnxruntime.quantization import quantize_dynamic, QuantType
 
# Đường dẫn đến mô hình gốc và mô hình sau khi quantize
model_input = "models/phobert-binary/onnx/phobert-binary.onnx"
model_output = "models/phobert-binary/onnx/phobert-binary-quantized.onnx"
 
quantize_dynamic(model_input, model_output, weight_type=QuantType.QUInt8)




In [21]:
model_output_2 = "models/phobert-binary/onnx/phobert-binary-quantized-int4.onnx"
 
quantize_dynamic(model_input, model_output, weight_type=QuantType.QUInt4)

