# Doing infernce on Jetson AGX by loading .trt engine:

In [1]:
import torch
import torch.onnx
import sys
import progressbar
import time
import numpy as np
from skimage import io
from skimage.transform import resize
import tensorrt as trt
import os
import cv2
from torchvision import transforms
from PIL import Image
import pycuda.driver as cuda
import pycuda.autoinit

In [4]:
BATCH_SIZE = 1

In [5]:
USE_FP16 = True
target_dtype = np.float16 if USE_FP16 else np.float32

 To create a test batch, we will once again repeat one open-source dog image from http://www.dog.ceo:

In [6]:
url='data_clean/n01440764-ILSVRC2012_val_00009191.JPEG'

img = Image.open(url)

if(img.mode != "RGB"):
    img = img.convert("RGB")

img = np.array(img)
img = resize(img, (224, 224))
input_batch = np.array(np.repeat(np.expand_dims(np.array(img, dtype=np.float32), axis=0), BATCH_SIZE, axis=0), dtype=np.float32)

input_batch.shape

(1, 224, 224, 3)

In [7]:
USE_FP16

True

In [8]:
# step out of Python for a moment to convert the ONNX model to a TRT engine using trtexec
# if USE_FP16:
#     !trtexec --onnx=resnet50_pytorch.onnx --saveEngine=resnet_engine_pytorch.trt  --explicitBatch --inputIOFormats=fp16:chw --outputIOFormats=fp16:chw --fp16
# else:
#     !trtexec --onnx=resnet50_pytorch.onnx --saveEngine=resnet_engine_pytorch.trt  --explicitBatch

This will save our model as 'resnet_engine.trt'.

In [9]:
# loading images from data_clean model
images=[]
names=[]

for filename in os.listdir('data_clean'):
    img = cv2.imread(os.path.join('data_clean',filename))
    if img is not None:
        images.append(img)
        names.append(filename)

print("loaded {} images".format(len(images)))

rows = open('synset.txt').read().strip().split("\n")
classes = [r[r.find(" ") + 1:] for r in rows]

data=[]

loaded 5000 images


In [10]:
# load serialized model and convert to deserialize model

%%time

import tensorrt as trt
import pycuda.driver as cuda
import pycuda.autoinit

f = open("mobile_gpu.trt", "rb")
runtime = trt.Runtime(trt.Logger(trt.Logger.WARNING)) 

engine = runtime.deserialize_cuda_engine(f.read())
context = engine.create_execution_context()

CPU times: user 1.99 s, sys: 992 ms, total: 2.98 s
Wall time: 2.98 s


In [12]:
import numpy as np

# need to set input and output precisions to FP16 to fully enable it
output = np.empty([BATCH_SIZE, 1000], dtype = target_dtype) 

# allocate device memory
d_input = cuda.mem_alloc(1 * input_batch.nbytes)
d_output = cuda.mem_alloc(1 * output.nbytes)

bindings = [int(d_input), int(d_output)]

stream = cuda.Stream()

In [13]:
transform = transforms.Compose([               #[1]
    transforms.Resize(256),                    #[2]
    transforms.CenterCrop(224),                #[3]
    transforms.ToTensor(),                     #[4]
    transforms.Normalize(                      #[5]
    mean=[0.485, 0.456, 0.406],                #[6]
    std=[0.229, 0.224, 0.225]                  #[7]
    )])


In [14]:
start = time.time()
bar = progressbar.ProgressBar(maxval=len(images), widgets=[progressbar.Bar('=', '[', ']'), ' ', progressbar.Percentage()])
bar.start()


for i in range(0,len(images)):
    img = Image.open("data_clean/"+names[i])
    if(img.mode != "RGB"):
        img = img.convert("RGB")

    img_t = transform(img)
    batch_t = torch.unsqueeze(img_t, 0)
    host_input = np.array(batch_t.numpy(), dtype=np.float16)
    

    cuda.memcpy_htod_async(d_input, host_input, stream)
    # execute model
    context.execute_async_v2(bindings, stream.handle, None)
    # transfer predictions back
    cuda.memcpy_dtoh_async(output, d_output, stream)
    # syncronize threads
    stream.synchronize()
    
    indices = (-output[0]).argsort()[:5]
    # append infernce output to data list
    data.append((names[i], output[0][indices[0]],classes[indices[0]]))

    bar.update(i+1)

bar.finish()
end = time.time()
print("classification took {:.5} seconds".format(end - start))




classification took 93.223 seconds


In [15]:
# store result in file
import sys
sys.stdout = open('mobilegpu_clean.txt','wt')
for i in range(0,len(data)):
    print(data[i])

### convert pytorch model to onnx

In [None]:
import torch.onnx
import torchvision

In [None]:
dummy_input = torch.randn(1,3,224,224)
model = torchvision.models.mobilenet_v2(pretrained=True)
torch.onnx.export(model, dummy_input, "mobilenet.onnx")

In [None]:
import onnx
model = onnx.load("inceptionv3.onnx")
onnx.checker.check_model(model)