# Here, some models of various types are created.

# Torch script model

In [None]:
import torch
import torchvision.models as models
from torchvision.models import ResNet18_Weights

# Load the pre-trained ResNet-50 model
resnet18 = models.resnet18(weights=ResNet18_Weights.DEFAULT)
resnet18.eval()  # Set the model to evaluation mode
scripted_model = torch.jit.script(resnet18)
scripted_model.save("triton_repository/pytorch_model/model.pt")

# Onnx model

In [58]:
import torch
import torchvision.models as models

# Load the pre-trained ResNet-50 model
ResNet18_Weights = models.resnet18(pretrained=True)
ResNet18_Weights.eval()  # Set the model to evaluation mode

# Dummy input tensor for the ONNX export (batch size = 1, 3 color channels, 224x224 image)
dummy_input = torch.randn(1, 3, 224, 224)

# Path to save the ONNX model
onnx_model_path = "model.onnx"

# Export the model to ONNX
torch.onnx.export(
    ResNet18_Weights,
    dummy_input,
    onnx_model_path,
    export_params=True,               # Store the trained parameters in the model file
    opset_version=11,                 # ONNX opset version
    do_constant_folding=True,         # Optimize constant folding for inference
    input_names=["input"],            # Input tensor name
    output_names=["output"],          # Output tensor name
    dynamic_axes={                    # Specify dynamic axes for batch size
        "input": {0: "batch_size"}, 
        "output": {0: "batch_size"}
    },
)

print(f"ResNet-18 ONNX model exported successfully to {onnx_model_path}")



# Bert model

In [3]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

model_name = "cointegrated/rubert-tiny2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
bert = AutoModelForSequenceClassification.from_pretrained(model_name)
bert.eval()
print("loaded")

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at cointegrated/rubert-tiny2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


loaded


In [4]:
dummy_input = tokenizer("Привет, как дела?" * 100, return_tensors="pt", max_length=512, padding="max_length")
dummy_input = (dummy_input["input_ids"], dummy_input["token_type_ids"], dummy_input["attention_mask"])
torch.onnx.export(
    bert,
    dummy_input,
    "bert.onnx",
    export_params=True,               # Store the trained parameters in the model file
    opset_version=17,                 # ONNX opset version
    do_constant_folding=True,         # Optimize constant folding for inference
    input_names=["input_ids", "token_type_ids", "attention_mask"],            # Input tensor name
    output_names=["output"],          # Output tensor name
    dynamic_axes={                    # Specify dynamic axes for batch size and sequence length
        "input_ids": {0: "batch_size", 1: "sequence_length"}, 
        "token_type_ids": {0: "batch_size", 1: "sequence_length"},
        "attention_mask": {0: "batch_size", 1: "sequence_length"},
        "output": {0: "batch_size"}
    },
)


## ONNX dynamic quantization

In [27]:
from onnxruntime.quantization import quantize_dynamic, QuantType

# Path to your float32 ONNX model
model_fp32 = "bert.onnx"
# Path where the quantized model will be saved
model_quant = "bert_int8.onnx"

# Apply dynamic quantization on the model weights (e.g., Linear, MatMul operators)
quantize_dynamic(model_fp32, model_quant, weight_type=QuantType.QUInt8)
print("Dynamic quantization complete.")



Dynamic quantization complete.
