# Example of the Tensorflow to TensorRT converter

The Tensorflow network needs to be in float32 und the internal data format should be 'NCHW' ('channel_first').

In [1]:
import io
import os
import time
import tarfile
import requests

# use the first GPU in the system
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   
os.environ["CUDA_VISIBLE_DEVICES"]="0"

import numpy as np
import tensorrt as trt
import tensorflow as tf

from trt_importer import TRTImporter




# variables
input_op = "input_tensor"
input_shape = [1, 224, 224, 3]
output_op = "softmax_tensor"
    
# example data
np.random.seed(7)
np_input = np.random.rand(*input_shape) * 255

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



## Download and extract a ResNet50 model from Tensorflow

In [2]:
response = requests.get("http://download.tensorflow.org/models/official/20181001_resnet/savedmodels/resnet_v2_fp32_savedmodel_NCHW.tar.gz")

# download and read the tar content
with tarfile.open(fileobj=io.BytesIO(response.content), mode='r:gz') as tar:
    for member in tar:
        print(member)
    tar.extractall(".")

<TarInfo './resnet_v2_fp32_savedmodel_NCHW' at 0x7ff15c4d2a10>
<TarInfo './resnet_v2_fp32_savedmodel_NCHW/1538687196' at 0x7ff15c4d2bb0>
<TarInfo './resnet_v2_fp32_savedmodel_NCHW/1538687196/variables' at 0x7ff15c4d2ae0>
<TarInfo './resnet_v2_fp32_savedmodel_NCHW/1538687196/variables/variables.index' at 0x7ff15c4d2c80>
<TarInfo './resnet_v2_fp32_savedmodel_NCHW/1538687196/variables/variables.data-00000-of-00001' at 0x7ff15c4d2d50>
<TarInfo './resnet_v2_fp32_savedmodel_NCHW/1538687196/saved_model.pb' at 0x7ff15c4d2ef0>


## Load the saved model and convert it into a frozen graph

In [3]:
from tensorflow.python.tools import strip_unused_lib

config = tf.ConfigProto()
config.gpu_options.allow_growth = True  # dynamically grow the memory usage

with tf.Session(graph=tf.Graph(), config=config) as session:
    tf.saved_model.loader.load(session, ["serve"], "resnet_v2_fp32_savedmodel_NCHW/1538687196")
    print("Loaded saved model with ",len(session.graph_def.node),"nodes")
    
    # creates a subgraph with only the needed nodes
    output_graph_def = tf.compat.v1.graph_util.convert_variables_to_constants(session, session.graph_def, [output_op])
    print("Converted to frozen graph with ",len(output_graph_def.node),"nodes")
    
    # remove batch sizes information
    output_graph_def = strip_unused_lib.strip_unused(output_graph_def,
                                                     input_node_names=[input_op],
                                                     output_node_names=[output_op],
                                                     placeholder_type_enum=tf.float32.as_datatype_enum)
    print("Strip unnessesary data. Resulting graph has",len(output_graph_def.node),"nodes.")
    
    # remove training and saver nodes
    output_graph_def = tf.compat.v1.graph_util.remove_training_nodes(output_graph_def)
    print("Final graph has",len(output_graph_def.node),"nodes, after removing training nodes")

Instructions for updating:
This function will only be available through the v1 compatibility library as tf.compat.v1.saved_model.loader.load or tf.compat.v1.saved_model.load. There will be a new function for importing SavedModels in Tensorflow 2.0.
Instructions for updating:
Use standard file APIs to check for files with this prefix.
INFO:tensorflow:Restoring parameters from resnet_v2_fp32_savedmodel_NCHW/1538687196/variables/variables
Loaded saved model with  1932 nodes
Instructions for updating:
Use `tf.compat.v1.graph_util.convert_variables_to_constants`
Instructions for updating:
Use `tf.compat.v1.graph_util.extract_sub_graph`
INFO:tensorflow:Froze 251 variables.
INFO:tensorflow:Converted 251 variables to const ops.
Converted to frozen graph with  701 nodes
Strip unnessesary data. Resulting graph has 701 nodes.
Instructions for updating:
Use `tf.compat.v1.graph_util.remove_training_nodes`
Final graph has 442 nodes, after removing training nodes


## Convert the Tensorflow network into a TensorRT engine

In [4]:
importer = TRTImporter(trt.Logger.VERBOSE)
network = importer.from_tensorflow_graph_def(output_graph_def, [input_op], [input_shape], [output_op])
serialized_engine = importer.optimize_network(network, max_batch_size=1, max_workspace_size=4 * (1 << 30), fast_pass=True)
importer.store_engine(serialized_engine, "resnet_v2_fp32_savedmodel_NCHW.engine")

add input_tensor (1, 224, 224, 3)
add resnet_model/transpose/perm (4,)
add resnet_model/transpose (1, 3, 224, 224)
add resnet_model/Pad/paddings (4, 2)
add resnet_model/Pad (1, 3, 230, 230)
add resnet_model/conv2d/kernel (7, 7, 3, 64)
add resnet_model/conv2d/Conv2D (1, 64, 112, 112)
add resnet_model/max_pooling2d/MaxPool (1, 64, 56, 56)
add resnet_model/batch_normalization/gamma (64,)
add resnet_model/batch_normalization/beta (64,)
add resnet_model/batch_normalization/moving_mean (64,)
add resnet_model/batch_normalization/moving_variance (64,)
add resnet_model/batch_normalization/FusedBatchNorm (1, 64, 56, 56)
add resnet_model/Relu (1, 64, 56, 56)
add resnet_model/conv2d_1/kernel (1, 1, 64, 256)
add resnet_model/conv2d_1/Conv2D (1, 256, 56, 56)
add resnet_model/conv2d_2/kernel (1, 1, 64, 64)
add resnet_model/conv2d_2/Conv2D (1, 64, 56, 56)
add resnet_model/batch_normalization_1/gamma (64,)
add resnet_model/batch_normalization_1/beta (64,)
add resnet_model/batch_normalization_1/moving_m

add resnet_model/conv2d_27/Conv2D (1, 1024, 14, 14)
add resnet_model/add_7 (1, 1024, 14, 14)
add resnet_model/batch_normalization_24/gamma (1024,)
add resnet_model/batch_normalization_24/beta (1024,)
add resnet_model/batch_normalization_24/moving_mean (1024,)
add resnet_model/batch_normalization_24/moving_variance (1024,)
add resnet_model/batch_normalization_24/FusedBatchNorm (1, 1024, 14, 14)
add resnet_model/Relu_24 (1, 1024, 14, 14)
add resnet_model/conv2d_28/kernel (1, 1, 1024, 256)
add resnet_model/conv2d_28/Conv2D (1, 256, 14, 14)
add resnet_model/batch_normalization_25/gamma (256,)
add resnet_model/batch_normalization_25/beta (256,)
add resnet_model/batch_normalization_25/moving_mean (256,)
add resnet_model/batch_normalization_25/moving_variance (256,)
add resnet_model/batch_normalization_25/FusedBatchNorm (1, 256, 14, 14)
add resnet_model/Relu_25 (1, 256, 14, 14)
add resnet_model/conv2d_29/kernel (3, 3, 256, 256)
add resnet_model/conv2d_29/Conv2D (1, 256, 14, 14)
add resnet_mod

add resnet_model/conv2d_48/Conv2D (1, 512, 7, 7)
add resnet_model/batch_normalization_44/gamma (512,)
add resnet_model/batch_normalization_44/beta (512,)
add resnet_model/batch_normalization_44/moving_mean (512,)
add resnet_model/batch_normalization_44/moving_variance (512,)
add resnet_model/batch_normalization_44/FusedBatchNorm (1, 512, 7, 7)
add resnet_model/Relu_44 (1, 512, 7, 7)
add resnet_model/conv2d_49/kernel (1, 1, 512, 2048)
add resnet_model/conv2d_49/Conv2D (1, 2048, 7, 7)
add resnet_model/add_14 (1, 2048, 7, 7)
add resnet_model/batch_normalization_45/gamma (2048,)
add resnet_model/batch_normalization_45/beta (2048,)
add resnet_model/batch_normalization_45/moving_mean (2048,)
add resnet_model/batch_normalization_45/moving_variance (2048,)
add resnet_model/batch_normalization_45/FusedBatchNorm (1, 2048, 7, 7)
add resnet_model/Relu_45 (1, 2048, 7, 7)
add resnet_model/conv2d_50/kernel (1, 1, 2048, 512)
add resnet_model/conv2d_50/Conv2D (1, 512, 7, 7)
add resnet_model/batch_norma

True

## Load the engine and run an inference pass

In [5]:
importer = TRTImporter(trt.Logger.VERBOSE)
serialized_engine = importer.load_engine("resnet_v2_fp32_savedmodel_NCHW.engine")

In [6]:
inference_context = importer.inference_engine(serialized_engine)

print("\nBinding of input and output tensors")
print(inference_context.get_input_bindings())
print(inference_context.get_output_bindings())

Plugin after: 0 [msec]
Runtime after: 190 [msec]
Deserialize Cuda engine after: 1479 [msec]
Create execution context after: 1480 [msec]
Setup bindings after: 1481 [msec]
All shapes are known True
All dynamic shapes are known True

Binding of input and output tensors
{'input_tensor': Binding of input_tensor
Shape:(1, 224, 224, 3)
Host:[0. 0. 0. ... 0. 0. 0.]
Device:<pycuda._driver.DeviceAllocation object at 0x7ff0d84c9b70>
IsInput:True}
[Binding of softmax_tensor
Shape:(1, 1001)
Host:[0. 0. 0. ... 0. 0. 0.]
Device:<pycuda._driver.DeviceAllocation object at 0x7ff0d84c9710>
IsInput:False]


In [7]:
for i in range(10):
    start = time.time()
    output = inference_context.run(feed_dict={input_op: np_input})
    print('Inference time: {} [msec]'.format((time.time() - start)*1000))
trt_output = output[output_op]

Inference time: 3.821849822998047 [msec]
Inference time: 88.60540390014648 [msec]
Inference time: 3.4532546997070312 [msec]
Inference time: 3.397226333618164 [msec]
Inference time: 3.401517868041992 [msec]
Inference time: 3.381013870239258 [msec]
Inference time: 3.4008026123046875 [msec]
Inference time: 3.411531448364258 [msec]
Inference time: 3.4499168395996094 [msec]
Inference time: 3.4148693084716797 [msec]


In [8]:
trt_output

array([[5.1498625e-07, 1.7150540e-05, 1.7791535e-04, ..., 1.5762380e-06,
        5.4471198e-05, 3.7366131e-05]], dtype=float32)

In [9]:
del inference_context

## Run a simple test image in Tensorflow to get reference results

In [10]:
# create a new graph and use it as the default graph
graph = tf.Graph()
with graph.as_default():
    nodes = tf.import_graph_def(output_graph_def, name='', return_elements=[input_op, output_op])
    tf_input_tensor = nodes[0].outputs[0]
    tf_output_tensor = nodes[1].outputs[0]
    
    print(tf_input_tensor)
    print(tf_output_tensor)
    
with tf.Session(graph=graph, config=config) as session:
    
    # time
    for i in range(10):
        start = time.time()
        tf_output = session.run(tf_output_tensor, feed_dict={tf_input_tensor: np_input})
        print('Inference time: {} [msec]'.format((time.time() - start)*1000))
    print("Output shape", tf_output.shape)

Tensor("input_tensor:0", dtype=float32)
Tensor("softmax_tensor:0", shape=(?, 1001), dtype=float32)
Inference time: 1433.9814186096191 [msec]
Inference time: 7.35020637512207 [msec]
Inference time: 7.376432418823242 [msec]
Inference time: 7.016897201538086 [msec]
Inference time: 7.330894470214844 [msec]
Inference time: 7.456779479980469 [msec]
Inference time: 7.67970085144043 [msec]
Inference time: 7.307291030883789 [msec]
Inference time: 7.283926010131836 [msec]
Inference time: 7.1563720703125 [msec]
Output shape (1, 1001)


## Compare the results 

In [11]:
print("difference between TensorRT and Tensorflow:", np.sum(np.abs(tf_output-trt_output)))
print(trt_output)

difference between TensorRT and Tensorflow: 1.6834913e-06
[[5.1498625e-07 1.7150540e-05 1.7791535e-04 ... 1.5762380e-06
  5.4471198e-05 3.7366131e-05]]
