In [None]:
!pip list

Package                            Version
---------------------------------- -------------------
absl-py                            1.4.0
accelerate                         0.34.2
aiohappyeyeballs                   2.4.3
aiohttp                            3.10.8
aiosignal                          1.3.1
alabaster                          0.7.16
albucore                           0.0.16
albumentations                     1.4.15
altair                             4.2.2
annotated-types                    0.7.0
anyio                              3.7.1
argon2-cffi                        23.1.0
argon2-cffi-bindings               21.2.0
array_record                       0.5.1
arviz                              0.19.0
astropy                            6.1.4
astropy-iers-data                  0.2024.9.30.0.32.59
astunparse                         1.6.3
async-timeout                      4.0.3
atpublic                           4.1.0
attrs                              24.2.0
audioread         

## Download TensorRT library

In [2]:
import torch
import os
from transformers import AutoTokenizer, AutoModelForSequenceClassification

In [None]:
!python3 -m pip install --upgrade tensorrt



In [None]:
!pip list | grep tensorrt

tensorrt                           10.5.0
tensorrt-cu12                      10.5.0
tensorrt-cu12-bindings             10.5.0
tensorrt-cu12-libs                 10.5.0


In [1]:
!pip install onnx

Collecting onnx
  Downloading onnx-1.17.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (16 kB)
Downloading onnx-1.17.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (16.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.0/16.0 MB[0m [31m55.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: onnx
Successfully installed onnx-1.17.0


In [3]:
# Define file paths
PYTORCH_MODEL_PATH = "sentiment_model.pt"
ONNX_MODEL_PATH = "sentiment_model.onnx"
TRT_FP32_PATH = "sentiment_model_fp32.plan"
TRT_FP16_PATH = "sentiment_model_fp16.plan"

# 1. Save PyTorch model (.pt)
def save_pytorch_model(model):
    torch.save(model.state_dict(), PYTORCH_MODEL_PATH)
    print(f"PyTorch model saved at {PYTORCH_MODEL_PATH}")

# 2. Convert to ONNX (.onnx)
def convert_to_onnx(model, tokenizer):
    model.eval()
    # Example input to trace the model (dummy input)
    text = "The service was great!"
    inputs = tokenizer(text, return_tensors="pt")

    torch.onnx.export(
        model,
        (inputs['input_ids'], inputs['attention_mask']),  # model input tuple
        ONNX_MODEL_PATH,
        export_params=True,
        opset_version=14,
        input_names=['input_ids', 'attention_mask'],
        output_names=['output'],
        dynamic_axes={'input_ids': {0: 'batch_size'}, 'attention_mask': {0: 'batch_size'}, 'output': {0: 'batch_size'}}
    )
    print(f"ONNX model saved at {ONNX_MODEL_PATH}")

# 3. Convert ONNX to TensorRT using trtexec
def convert_to_tensorrt(onnx_model_path, trt_fp32_path, trt_fp16_path):
    # Convert ONNX to TensorRT FP32
    fp32_cmd = f"trtexec --onnx={onnx_model_path} --saveEngine={trt_fp32_path}"
    os.system(fp32_cmd)
    print(f"TensorRT FP32 model saved at {trt_fp32_path}")

    # Convert ONNX to TensorRT FP16
    fp16_cmd = f"trtexec --onnx={onnx_model_path} --saveEngine={trt_fp16_path} --fp16"
    os.system(fp16_cmd)
    print(f"TensorRT FP16 model saved at {trt_fp16_path}")

# Main process
def main():
    # Load model and tokenizer
    tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest")
    model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest")

    # Step 1: Save PyTorch Model
    save_pytorch_model(model)

    # Step 2: Convert to ONNX
    convert_to_onnx(model, tokenizer)

    # Step 3: Convert to TensorRT (FP32 and FP16)
    convert_to_tensorrt(ONNX_MODEL_PATH, TRT_FP32_PATH, TRT_FP16_PATH)

if __name__ == "__main__":
    main()

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/929 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]



pytorch_model.bin:   0%|          | 0.00/501M [00:00<?, ?B/s]

Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


PyTorch model saved at sentiment_model.pt
ONNX model saved at sentiment_model.onnx
TensorRT FP32 model saved at sentiment_model_fp32.plan
TensorRT FP16 model saved at sentiment_model_fp16.plan


## The .plan file (TensorRT optimized model file won't be generated if running NVIDIA-T4 GPU) as it doesnt support FP32 and FP16 precision type optimization. Here the .pt file is the ONNX file for model, hardware optimized.