## Create a model and export it as ONNX

In [None]:
import torch
from torchvision.models import resnet50, ResNet50_Weights

batch_size = 1
device = 'cuda'
model = resnet50(weights=ResNet50_Weights.IMAGENET1K_V2).to(device)

After training...

In [None]:
torch.save(model.state_dict(), './models/cls.pt')

In [None]:
dummy_input = torch.randn(batch_size, 3, 256, 256).to(device)

with torch.no_grad():
    print(model(dummy_input).shape)

torch.onnx.export(model, dummy_input, './models/cls.onnx')

## Convert the Torch model to Torch-TensorRT

In [None]:
# Restart the notebook kernel
import os
os._exit(00)

In [None]:
import torch_tensorrt
from torchvision.models import resnet50, ResNet50_Weights
import torch

batch_size = 32
device = 'cuda'
model = resnet50(weights=ResNet50_Weights.IMAGENET1K_V2).to(device)
model.load_state_dict(torch.load('./models/cls.pt'))

# inputs = [torch_tensorrt.Input((32, 3, 256, 256))]
# inputs = [torch_tensorrt.Input((64, 3, 256, 256))]

inputs = [
    torch_tensorrt.Input(
        min_shape=[1, 3, 256, 256],
        opt_shape=[32, 3, 256, 256],
        max_shape=[256, 3, 256, 256],
        dtype=torch.half,
    )
]

In [None]:
model.eval()
trt_model = torch_tensorrt.compile(model,
                                   inputs=inputs,
                                   enabled_precisions={torch_tensorrt.dtype.half})
torch.jit.save(trt_model, 'models/cls_torchtrt.ts')

## Convert the ONNX model to TensorRT engine

In [None]:
# Restart the notebook kernel
import os
os._exit(00)

- Convert to FP32 engine

In [None]:
!trtexec --onnx=./models/cls.onnx --saveEngine=./models/cls_32.engine 

- Convert to FP16 engine

In [None]:
!trtexec --onnx=./models/cls.onnx --saveEngine=./models/cls_16.engine --fp16 

## Benchmark

In [None]:
from benchmark import NativeTorchBenchmark, TensorRTBehcnmark, TorchScriptBenchmark, TorchTensorRTBenchmark
import numpy as np
from torchvision.models import resnet50, ResNet50_Weights
import matplotlib.pyplot as plt
from skimage.transform import resize

n_infers = 100
batch_size = 1

input_image = np.random.normal(size=[batch_size, 3, 256, 256])

- Native PyTorch

In [None]:
model = resnet50(weights=ResNet50_Weights.IMAGENET1K_V2)

pt_bm = NativeTorchBenchmark(n_infers=n_infers,
                             batch_size=batch_size,
                             samples=input_image,
                             model_arch=model,
                             model_ckpt='./models/cls.pt')

In [None]:
pt_bm.benchmark()

- Torch Script

In [None]:
model = resnet50(weights=ResNet50_Weights.IMAGENET1K_V2)

ts_bm = TorchScriptBenchmark(n_infers=n_infers,
                             batch_size=batch_size,
                             samples=input_image,
                             model_arch=model,
                             model_ckpt='./models/cls.pt')

In [None]:
ts_bm.benchmark()

- Torch-TensorRT (FP16)

In [None]:
ts_trt_bm = TorchTensorRTBenchmark(n_infers=n_infers,
                                   batch_size=batch_size,
                                   samples=input_image,
                                   model_path='./models/cls_torchtrt.ts')

In [None]:
ts_trt_bm.benchmark()

- TensorRT FP32

In [None]:
trt_bm_fp32 = TensorRTBehcnmark(n_infers=n_infers,
                                batch_size=batch_size,
                                samples=input_image,
                                engine_path='./models/cls_32.engine')

In [None]:
trt_bm_fp32.benchmark()

- TensorRT FP16

In [None]:
trt_bm_fp16 = TensorRTBehcnmark(n_infers=n_infers,
                                batch_size=batch_size,
                                samples=input_image,
                                engine_path='./models/cls_16.engine')

In [None]:
trt_bm_fp16.benchmark()