In [None]:
import onnx
import numpy as np
import tvm
from tvm import te
import tvm.relay as relay
from tvm.contrib.download import download_testdata
import os
import PIL.Image as pil
import networks

# PyTorch imports
import torch
import torchvision
import torch.nn as nn
from torchvision import transforms

In [None]:
class Depth(nn.Module):
    def __init__(self, encoder, decoder, output_list=False):
        super().__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.output_list = output_list
        
    def forward(self, inputs):
        feature = self.encoder(inputs)
        output = self.decoder(*tuple(feature))
        if self.output_list:
            list_output = []
            for key, value in output.items():
                list_output.append(value)
            output = list_output
        
        return output

## Pytorch model 

In [None]:
model_name = "resnet18"
load_weights_folder = os.path.join("/work", "garin0115", "models", model_name+"_256x832", "models", "weights_19")

encoder_path = os.path.join(load_weights_folder, "encoder.pth")
decoder_path = os.path.join(load_weights_folder, "depth.pth")
encoder_pth = torch.load(encoder_path)
decoder_pth = torch.load(decoder_path)
encoder = networks.ResnetEncoder(18, False)
decoder = networks.DepthDecoder(encoder.num_ch_enc)
encoder.load_state_dict({k: v for k, v in encoder_pth.items() if k in encoder.state_dict()})
decoder.load_state_dict(decoder_pth)
model = Depth(encoder, decoder, True)

In [None]:
input_shape = [1, 3, 256, 832]
input_data = torch.randn(input_shape)
scripted_model = torch.jit.trace(model, input_data).eval()

## Load input 

In [None]:
image_path = "assets/test_image.jpg"
input_image = pil.open(image_path).convert('RGB')
original_width, original_height = input_image.size
input_image_resized = input_image.resize((832, 256), pil.LANCZOS)
input_image_torch = transforms.ToTensor()(input_image_resized).unsqueeze(0)
# input_image_torch = input_image_torch.cuda()

In [None]:
input_name = 'input0'
shape_list = [(input_name, input_image_torch.shape)]
mod, params = relay.frontend.from_pytorch(scripted_model,
                                          shape_list)

## ONNX model 

In [None]:
model_path = os.path.join("/work", 
                          "garin0115", 
                          "models", 
                          "resnet18_256x832", 
                          "models", 
                          "weights_19", 
                          "resnet18.onnx")
onnx_model = onnx.load(model_path)

x = np.ones([1,3,256,832])
# arch = "arm64"
# target =  "llvm -target=%s-linux-android" % arch
target = 'llvm'
input_name = 'gemfield'
shape_dict = {input_name: x.shape}
sym, params = relay.frontend.from_onnx(onnx_model, shape_dict)

with relay.build_config(opt_level=1):
    intrp = relay.build_module.create_executor('graph', sym, tvm.cpu(0), target)

dtype = 'float32'
tvm_output = intrp.evaluate(sym)(tvm.nd.array(x.astype(dtype)), **params).asnumpy()

with relay.build_config(opt_level=2):
    graph, lib, params = relay.build_module.build(sym, target, params=params)

libpath = "gemfield.so"
lib.export_library(libpath)

graph_json_path = "gemfield.json"
with open(graph_json_path, 'w') as fo:
    fo.write(graph)

param_path = "gemfield.params"
with open(param_path, 'wb') as fo:
    fo.write(relay.save_param_dict(params))

# TRT

In [1]:
import torch
import numpy as np
import tensorrt as trt
import pycuda.driver as cuda
import pycuda.autoinit
from PIL import ImageDraw
import time
import sys, os
TRT_LOGGER = trt.Logger(trt.Logger.VERBOSE)

In [2]:
def get_engine(onnx_file_path, engine_file_path=""):
    """Attempts to load a serialized engine if available, otherwise builds a new TensorRT engine and saves it."""
    def build_engine(shape=[1,3,256,832]):
        """Takes an ONNX file and creates a TensorRT engine to run inference with"""
#         network_creation_flag = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_PRECISION)
#         network_creation_flag |= 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
        network_creation_flag = 1
        with trt.Builder(TRT_LOGGER) as builder, builder.create_network(network_creation_flag) as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
            builder.max_workspace_size = 1 << 30 # 1GB
            builder.max_batch_size = 1
            # Parse model file
            if not os.path.exists(onnx_file_path):
                print('ONNX file {} not found, please run yolov3_to_onnx.py first to generate it.'.format(onnx_file_path))
                exit(0)
            print('Loading ONNX file from path {}...'.format(onnx_file_path))
            with open(onnx_file_path, 'rb') as model:
                if not parser.parse(model.read()):
                    for error in range(parser.num_errors):
                        print(parser.get_error(error))
                print('Beginning ONNX file parsing')
                res = parser.parse(model.read())
                print(res)
#             network.get_input(0).shape = shape
            last_layer = network.get_layer(network.num_layers - 1)
            network.mark_output(last_layer.get_output(0))
            print('Completed parsing of ONNX file')
            print('Building an engine from file {}; this may take a while...'.format(onnx_file_path))
            engine = builder.build_cuda_engine(network)
            print("Completed creating Engine")
            with open(engine_file_path, "wb") as f:
                buf = engine.serialize()
                f.write(buf)
            return engine

    if os.path.exists(engine_file_path):
        # If a serialized engine exists, use it instead of building an engine.
        print("Reading engine from file {}".format(engine_file_path))
        with open(engine_file_path, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
            return runtime.deserialize_cuda_engine(f.read())
    else:
        return build_engine()

In [3]:
onnx_file_path = os.path.join("/work", 
                          "garin0115", 
                          "models", 
                          "resnet18_256x832", 
                          "models", 
                          "weights_19", 
                          "resnet18.onnx")
engine_file_path = "resnet18.trt"
input_image_path = "./assets/test_image.jpg"

In [4]:
engine = get_engine(onnx_file_path, engine_file_path)

Loading ONNX file from path /work/garin0115/models/resnet18_256x832/models/weights_19/resnet18.onnx...
Beginning ONNX file parsing
True
Completed parsing of ONNX file
Building an engine from file /work/garin0115/models/resnet18_256x832/models/weights_19/resnet18.onnx; this may take a while...
Completed creating Engine


In [None]:
def main():

    """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference."""

    # Try to load a previously generated YOLOv3-608 network graph in ONNX format:
    onnx_file_path = 'yolov3-608.onnx'
    engine_file_path = "yolov3-608.trt"
    input_image_path = "./images/b.jpg"

    # Two-dimensional tuple with the target network's (spatial) input resolution in HW ordered
    input_resolution_yolov3_HW = (608, 608)

    # Create a pre-processor object by specifying the required input resolution for YOLOv3
    preprocessor = PreprocessYOLO(input_resolution_yolov3_HW)

    # Load an image from the specified input path, and return it together with  a pre-processed version
    image_raw, image = preprocessor.process(input_image_path)

    # Store the shape of the original input image in WH format, we will need it for later
    shape_orig_WH = image_raw.size

    # Output shapes expected by the post-processor
    output_shapes = [(1, 255, 19, 19), (1, 255, 38, 38), (1, 255, 76, 76)]
    # output_shapes = [(1, 255, 13, 13), (1, 255, 26, 26), (1, 255, 52, 52)]

    # Do inference with TensorRT
    trt_outputs = []
    a = torch.cuda.FloatTensor()
    average_inference_time = 0
    average_yolo_time = 0
    counter = 10
    with get_engine(onnx_file_path, engine_file_path) as engine, engine.create_execution_context() as context:
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        while counter:
            # Do inference
            print('Running inference on image {}...'.format(input_image_path))
            # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing.
            inference_start = time.time()
            inputs[0].host = image
            trt_outputs = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream)
            inference_end = time.time()
            inference_time = inference_end-inference_start
            average_inference_time = average_inference_time + inference_time
            print('inference time : %f' % (inference_end-inference_start))

            # Do yolo_layer with pytorch
            inp_dim = 608
            num_classes = 80
            CUDA = True
            yolo_anchors = [[(116, 90), (156, 198), (373, 326)],
                            [(30, 61),  (62, 45),   (59, 119)],
                            [(10, 13),  (16, 30),   (33, 23)]]
            write = 0
            yolo_start = time.time()
            for output, shape, anchors in zip(trt_outputs, output_shapes, yolo_anchors):
                output = output.reshape(shape) 
                trt_output = torch.from_numpy(output).cuda()
                trt_output = trt_output.data
                trt_output = predict_transform(trt_output, inp_dim, anchors, num_classes, CUDA)

                if type(trt_output) == int:
                    continue

                if not write:
                    detections = trt_output
                    write = 1

                else:
                    detections = torch.cat((detections, trt_output), 1)
            dets = dynamic_write_results(detections, 0.5, num_classes, nms=True, nms_conf=0.45) #0.008
            yolo_end = time.time()
            yolo_time = yolo_end-yolo_start
            average_yolo_time = average_yolo_time + yolo_time
            print('yolo time : %f' % (yolo_end-yolo_start))
            print('all time : %f' % (yolo_end-inference_start))
            counter = counter -1

        average_yolo_time = average_yolo_time/10
        average_inference_time = average_inference_time/10
        print("--------------------------------------------------------")
        print('average yolo time : %f' % (average_yolo_time))
        print('average inference time : %f' % (average_inference_time))
        print("--------------------------------------------------------")

if __name__ == '__main__':
    main()