In [None]:
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import onnx
import onnxruntime

# Load pre-trained GPT-2 model and tokenizer
model_name = "gpt2"  # or "gpt2-medium", "gpt2-large", "gpt2-xl"
model = GPT2LMHeadModel.from_pretrained(model_name)
tokenizer = GPT2Tokenizer.from_pretrained(model_name)

# Use GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Set the model to evaluation mode
model.eval()

# Sample input to the model
seed_text = " "
input_ids = tokenizer.encode(seed_text, return_tensors="pt").to(device)

# Define the output path for the ONNX model
onnx_model_path = "gpt2.onnx"

# Export the model to ONNX format
torch.onnx.export(
    model,                                 # Model to export
    input_ids,                             # Sample input
    onnx_model_path,                       # Output file path
    export_params=True,                    # Store the trained parameters
    opset_version=11,                      # ONNX opset version
    do_constant_folding=True,              # Optimization flag
    input_names=["input_ids"],             # Model's input name
    output_names=["logits"],               # Model's output name
    dynamic_axes={"input_ids": {0: "batch_size", 1: "sequence_length"}, # Dynamic axes
                  "logits": {0: "batch_size", 1: "sequence_length"}},
)

print(f"Model exported to {onnx_model_path}")
