In [None]:
from torch2trt import *
from torch2trt.module_test import ModuleTest, MODULE_TESTS
import time
import argparse
import re
import runpy
from termcolor import colored

In [None]:
def run(self):
    # create module
    module = self.module_fn()
    module = module.to(self.device)
    module = module.type(self.dtype)
    module = module.eval()
    
    # create inputs for conversion
    inputs_conversion = ()
    for shape in self.input_shapes:
        inputs_conversion += (torch.zeros(shape).to(self.device).type(self.dtype), )
        
    
    # convert module
    module_trt = torch2trt(module, inputs_conversion, max_workspace_size=1 << 20,  **self.torch2trt_kwargs)

    # create inputs for torch/trt.. copy of inputs to handle inplace ops
    inputs = ()
    for shape in self.input_shapes:
        inputs += (torch.randn(shape).to(self.device).type(self.dtype), )
    inputs_trt = tuple([tensor.clone() for tensor in inputs])


    # test output against original
    outputs = module(*inputs)
    outputs_trt = module_trt(*inputs_trt)

    if not isinstance(outputs, tuple):
        outputs = (outputs, )
    
    # compute max error
    max_error = 0
    for i in range(len(outputs)):
        max_error_i = 0
        if outputs[i].dtype == torch.bool:
            max_error_i = torch.sum(outputs[i] ^ outputs_trt[i])
        else:
            max_error_i = torch.max(torch.abs(outputs[i] - outputs_trt[i]))

        if max_error_i > max_error:
            max_error = max_error_i
    
    # benchmark pytorch throughput
    torch.cuda.current_stream().synchronize()
    t0 = time.time()
    for i in range(50):
        outputs = module(*inputs)
    torch.cuda.current_stream().synchronize()
    t1 = time.time()
    
    fps = 50.0 / (t1 - t0)
    
    # benchmark tensorrt throughput
    torch.cuda.current_stream().synchronize()
    t0 = time.time()
    for i in range(50):
        outputs = module_trt(*inputs)
    torch.cuda.current_stream().synchronize()
    t1 = time.time()
    
    fps_trt = 50.0 / (t1 - t0)
    
    # benchmark pytorch latency
    torch.cuda.current_stream().synchronize()
    t0 = time.time()
    for i in range(50):
        outputs = module(*inputs)
        torch.cuda.current_stream().synchronize()
    t1 = time.time()
    
    ms = 1000.0 * (t1 - t0) / 50.0
    
    # benchmark tensorrt latency
    torch.cuda.current_stream().synchronize()
    t0 = time.time()
    for i in range(50):
        outputs = module_trt(*inputs)
        torch.cuda.current_stream().synchronize()
    t1 = time.time()
    
    ms_trt = 1000.0 * (t1 - t0) / 50.0
    
    return max_error, fps, fps_trt, ms, ms_trt

In [None]:
num_tests, num_success, num_tolerance, num_error = 0, 0, 0, 0
for test in MODULE_TESTS:

    # filter by module name
    name = test.module_name()
    if not re.search("interpolate", name):
        continue

    num_tests += 1
    # run test
    try:

        max_error, fps, fps_trt, ms, ms_trt = run(test)

        # write entry
        line = '| %s | %s | %s | %s | %.2E | %.3g | %.3g | %.3g | %.3g |' % (name, test.dtype.__repr__().split('.')[-1], str(test.input_shapes), str(test.torch2trt_kwargs), max_error, fps, fps_trt, ms, ms_trt)


        print(line)
        num_success += 1
    except:
        line = '| %s | %s | %s | %s | N/A | N/A | N/A | N/A | N/A |' % (name, test.dtype.__repr__().split('.')[-1], str(test.input_shapes), str(test.torch2trt_kwargs))
        print(colored(line, 'red'))
        num_error += 1



print('NUM_TESTS: %d' % num_tests)
print('NUM_SUCCESSFUL_CONVERSION: %d' % num_success)
print('NUM_FAILED_CONVERSION: %d' % num_error)
print('NUM_ABOVE_TOLERANCE: %d' % num_tolerance)

In [1]:
import torch
from torch2trt import torch2trt
from torchvision.models.alexnet import alexnet
import cv2



In [None]:
model = alexnet(pretrained=True).eval().cuda()
filename = "assets/test_image.jpg"
image_cv = cv2.imread(filename)  
image_cv = cv2.resize(image_cv, (224, 224))  
image_cv = torch.from_numpy(image_cv).cuda().float()  
x= image_cv.permute(2,0,1).unsqueeze(0).contiguous()
model_trt = torch2trt(model, [x])  
y = model(x)  
y_trt = model_trt(x)
print(torch.max(torch.abs(y - y_trt))) 

In [None]:
model = alexnet(pretrained=True).eval().cuda()  
image_cv = cv2.imread(filename)  
image_cv = cv2.resize(image_cv, (224, 224))  
image_cv = image_cv.transpose(2,0,1)
image_cv = np.expand_dims(image_cv, axis=0)
x = torch.from_numpy(image_cv).cuda().float().contiguous()
model_trt = torch2trt(model, [x])  
y = model(x)  
y_trt = model_trt(x)  
print(torch.max(torch.abs(y - y_trt))) 

In [3]:
model = alexnet(pretrained=True).eval().cuda()  
x = torch.ones((1, 3, 224, 224)).cuda() 
model_trt = torch2trt(model, [x])  
y = model(x)  
y_trt = model_trt(x)  
print(torch.max(torch.abs(y - y_trt)))  

tensor(1.1921e-06, device='cuda:0', grad_fn=<MaxBackward1>)


In [7]:
import torchvision.models as models
import torch.nn as nn
import torch
model = models.mobilenet_v2(pretrained=True)

In [8]:
print(model)

MobileNetV2(
  (features): Sequential(
    (0): ConvBNReLU(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace=True)
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): ConvBNReLU(
          (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
        (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): InvertedResidual(
      (conv): Sequential(
        (0): ConvBNReLU(
          (0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=Tr

In [9]:
model.features[0][0] = nn.Conv2d(1 ,32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)

In [10]:
print(model)

MobileNetV2(
  (features): Sequential(
    (0): ConvBNReLU(
      (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace=True)
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): ConvBNReLU(
          (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
        (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): InvertedResidual(
      (conv): Sequential(
        (0): ConvBNReLU(
          (0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=Tr