### Read the input image

In [1]:
# import the needed libraries
import tensorflow as tf
import tensorflow.contrib.tensorrt as trt # must import this although we will not use it explicitly
from tensorflow.python.platform import gfile
from PIL import Image
import numpy as np
import time
from matplotlib import pyplot as plt

# read the testing images (only for example)
img1= Image.open("dataset/mnist/testing/0/img_108.jpg")
img2= Image.open("dataset/mnist/testing/1/img_0.jpg")
img1 = np.asarray(img1)
img2 = np.asarray(img2)
input_img = np.concatenate((img1.reshape((1, 28, 28, 1)), 
                            img2.reshape((1, 28, 28, 1))), 
                           axis=0)

### Function to read ".pb" model (TensorRT model is stored in ".pb")

In [2]:
# function to read a ".pb" model 
# (can be used to read frozen model or TensorRT model)
def read_pb_graph(model):
  with gfile.FastGFile(model,'rb') as f:
    graph_def = tf.GraphDef()
    graph_def.ParseFromString(f.read())
  return graph_def

### Perform inference using TensorRT model

In [3]:
# variable
TENSORRT_MODEL_PATH = './model/TensorRT_model.pb'

graph = tf.Graph()
with graph.as_default():
    with tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=0.50))) as sess:
        # read TensorRT model
        trt_graph = read_pb_graph(TENSORRT_MODEL_PATH)

        # obtain the corresponding input-output tensor
        tf.import_graph_def(trt_graph, name='')
        input = sess.graph.get_tensor_by_name('input_tensor_input:0')
        output = sess.graph.get_tensor_by_name('output_tensor/Softmax:0')

        # in this case, it demonstrates to perform inference for 50 times
        total_time = 0; n_time_inference = 50
        out_pred = sess.run(output, feed_dict={input: input_img})
        for i in range(n_time_inference):
            t1 = time.time()
            out_pred = sess.run(output, feed_dict={input: input_img})
            t2 = time.time()
            delta_time = t2 - t1
            total_time += delta_time
            print("needed time in inference-" + str(i) + ": ", delta_time)
        avg_time_tensorRT = total_time / n_time_inference
        print("average inference time: ", avg_time_tensorRT)

Instructions for updating:
Use tf.gfile.GFile.
needed time in inference-0:  0.0007970333099365234
needed time in inference-1:  0.0008862018585205078
needed time in inference-2:  0.0005896091461181641
needed time in inference-3:  0.0007572174072265625
needed time in inference-4:  0.0006136894226074219
needed time in inference-5:  0.0008072853088378906
needed time in inference-6:  0.0006251335144042969
needed time in inference-7:  0.0009069442749023438
needed time in inference-8:  0.00069427490234375
needed time in inference-9:  0.0005767345428466797
needed time in inference-10:  0.0005242824554443359
needed time in inference-11:  0.0006775856018066406
needed time in inference-12:  0.0006916522979736328
needed time in inference-13:  0.0007081031799316406
needed time in inference-14:  0.0005643367767333984
needed time in inference-15:  0.0006608963012695312
needed time in inference-16:  0.00066375732421875
needed time in inference-17:  0.0005910396575927734
needed time in inference-18:  0

### Perform inference using the original tensorflow model

In [4]:
# variable
FROZEN_MODEL_PATH = './model/frozen_model.pb'

graph = tf.Graph()
with graph.as_default():
    with tf.Session() as sess:
        # read TensorRT model
        frozen_graph = read_pb_graph(FROZEN_MODEL_PATH)

        # obtain the corresponding input-output tensor
        tf.import_graph_def(frozen_graph, name='')
        input = sess.graph.get_tensor_by_name('input_tensor_input:0')
        output = sess.graph.get_tensor_by_name('output_tensor/Softmax:0')

        # in this case, it demonstrates to perform inference for 50 times
        total_time = 0; n_time_inference = 50
        out_pred = sess.run(output, feed_dict={input: input_img})
        for i in range(n_time_inference):
            t1 = time.time()
            out_pred = sess.run(output, feed_dict={input: input_img})
            t2 = time.time()
            delta_time = t2 - t1
            total_time += delta_time
            print("needed time in inference-" + str(i) + ": ", delta_time)
        avg_time_original_model = total_time / n_time_inference
        print("average inference time: ", avg_time_original_model)

needed time in inference-0:  0.0008623600006103516
needed time in inference-1:  0.0006482601165771484
needed time in inference-2:  0.0005645751953125
needed time in inference-3:  0.0006949901580810547
needed time in inference-4:  0.0006227493286132812
needed time in inference-5:  0.00063323974609375
needed time in inference-6:  0.0007140636444091797
needed time in inference-7:  0.000591278076171875
needed time in inference-8:  0.0006506443023681641
needed time in inference-9:  0.0006706714630126953
needed time in inference-10:  0.0006504058837890625
needed time in inference-11:  0.0006966590881347656
needed time in inference-12:  0.0006566047668457031
needed time in inference-13:  0.0007524490356445312
needed time in inference-14:  0.0005981922149658203
needed time in inference-15:  0.0011470317840576172
needed time in inference-16:  0.001070261001586914
needed time in inference-17:  0.0007946491241455078
needed time in inference-18:  0.0007913112640380859
needed time in inference-19: 

### Plot the prediction result

In [None]:
# plot the prediction output
plt.figure('img 1')
plt.imshow(img1, cmap='gray')
plt.title('pred:' + str(np.argmax(out_pred[0])), fontsize=22)

plt.figure('img 2')
plt.imshow(img2, cmap='gray')
plt.title('pred:' + str(np.argmax(out_pred[1])), fontsize=22)
plt.show()

In [2]:
# import the needed libraries
import tensorflow as tf
from tensorflow.python.platform import gfile
from PIL import Image
import numpy as np
import time
from matplotlib import pyplot as plt

# variable
TENSORRT_MODEL_PATH = './model/TensorRT_model.pb'

# read the testing images (only for example)
img1= Image.open("dataset/mnist/testing/0/img_108.jpg")
img2= Image.open("dataset/mnist/testing/1/img_0.jpg")
img1 = np.asarray(img1)
img2 = np.asarray(img2)
input_img = np.concatenate((img1.reshape((1, 28, 28, 1)), img2.reshape((1, 28, 28, 1))), axis=0)


# function to read a ".pb" model (can be used to read frozen model or TensorRT model)
def read_pb_graph(model):
  with gfile.FastGFile(model,'rb') as f:
    graph_def = tf.GraphDef()
    graph_def.ParseFromString(f.read())
  return graph_def


graph = tf.Graph()
with graph.as_default():
    with tf.Session() as sess:
        # read TensorRT model
        trt_graph = read_pb_graph(TENSORRT_MODEL_PATH)

        # obtain the corresponding input-output tensor
        tf.import_graph_def(trt_graph, name='')
        input = sess.graph.get_tensor_by_name('input_tensor_input:0')
        output = sess.graph.get_tensor_by_name('output_tensor/Softmax:0')

        # in this case, it demonstrates to perform inference for 50 times
        total_time = 0; n_time_inference = 50
        for i in range(n_time_inference):
            t1 = time.time()
            out_pred = sess.run(output, feed_dict={input: input_img})
            t2 = time.time()
            delta_time = t2 - t1
            total_time += delta_time
            print("needed time in inference-" + str(i) + ": ", delta_time)
        print("average inference time: ", total_time / n_time_inference)

        # plot the prediction output
        plt.figure('img 1')
        plt.imshow(img1, cmap='gray')
        plt.title('pred:' + str(np.argmax(out_pred[0])), fontsize=22)

        plt.figure('img 2')
        plt.imshow(img2, cmap='gray')
        plt.title('pred:' + str(np.argmax(out_pred[1])), fontsize=22)
        plt.show()

NotFoundError: Op type not registered 'TRTEngineOp' in binary running on cvrc-H97-D3H. Make sure the Op and Kernel are registered in the binary running in this process. Note that if you are loading a saved graph which used ops from tf.contrib, accessing (e.g.) `tf.contrib.resampler` should be done before importing the graph, as contrib ops are lazily registered when the module is first accessed.