https://pytorch.org/tutorials/beginner/onnx/export_simple_model_to_onnx_tutorial.html

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F


class MyModel(nn.Module):

    def __init__(self):
        super(MyModel, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


torch_model = MyModel().cuda()
torch_input = torch.randn(1, 1, 32, 32).cuda()
onnx_program = torch.onnx.dynamo_export(torch_model, torch_input)

  param_schemas = callee.param_schemas()
  param_schemas = callee.param_schemas()
  self.param_schema = self.onnxfunction.param_schemas()


In [2]:
import time

s = time.time()
for _ in range(10000):
    torch_model(torch_input)
print(time.time() - s)
# on cpu > 20 sec

0.9741103649139404


In [3]:
onnx_program.save("inference_optimization/onnx/my_image_classifier.onnx")

In [4]:
import onnx

onnx_model = onnx.load("inference_optimization/onnx/my_image_classifier.onnx")
onnx.checker.check_model(onnx_model)

In [None]:
import onnxruntime

# https://github.com/microsoft/onnxruntime/issues/21684#issuecomment-2300623788
# pip install onnxruntime-gpu==1.19.0


onnx_input = [torch_input]


print(f"Input length: {len(onnx_input)}")


print(f"Sample input: {onnx_input}")



ort_session = onnxruntime.InferenceSession(
    "inference_optimization/onnx/my_image_classifier.onnx",

    providers=["CUDAExecutionProvider"],
    # providers=["CPUExecutionProvider"],
)



def to_numpy(tensor):
    return (

        tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()
    )



onnxruntime_input = {

    k.name: to_numpy(v)
    for k, v in zip(ort_session.get_inputs(), onnx_input)
    # k.name: v[0] for k, v in zip(ort_session.get_inputs(), onnx_input)
}



# onnxruntime returns a list of outputs


onnxruntime_outputs = ort_session.run(None, onnxruntime_input)
onnxruntime_outputs

Input length: 1
Sample input: [tensor([[[[-0.8897, -2.0971,  1.5023,  ...,  1.0096, -0.3678, -1.0292],
          [ 0.6269,  2.7181,  0.0274,  ...,  0.5774, -1.2199,  0.8947],
          [-0.1511, -0.9477, -0.4847,  ..., -0.2903,  0.2606,  1.7323],
          ...,
          [ 1.4870,  1.1469,  0.0177,  ...,  0.6353, -0.6893,  0.1831],
          [-1.9934,  0.2834,  1.1603,  ...,  2.8390,  0.5041,  0.6823],
          [ 1.1443,  0.3378, -0.6748,  ...,  0.8843,  1.1407, -0.0104]]]],
       device='cuda:0')]


[array([[ 0.06790137,  0.0826061 , -0.04508397, -0.08962326, -0.06593737,
         -0.15124664,  0.07396881, -0.0775542 , -0.06990346, -0.07957613]],
       dtype=float32)]

In [None]:
import time

s = time.time()
# see utilization of gpu
for _ in range(1000000):
    ort_session.run(None, onnxruntime_input)
print(time.time() - s)

In [6]:
torch_outputs = torch_model(torch_input)

assert len(torch_outputs) == len(onnxruntime_outputs)
for torch_output, onnxruntime_output in zip(torch_outputs, onnxruntime_outputs):
    torch.testing.assert_close(torch_output, torch.tensor(onnxruntime_output))

print("PyTorch and ONNX Runtime output matched!")
print(f"Output length: {len(onnxruntime_outputs)}")
print(f"Sample output: {onnxruntime_outputs}")

PyTorch and ONNX Runtime output matched!
Output length: 1
Sample output: [[ 0.05567859 -0.14844745  0.01186287  0.05035208 -0.08846587  0.03980439
   0.10298605  0.06890388 -0.05330245  0.00457602]]
