In [1]:
# import the needed libraries
import tensorflow as tf
import tensorflow.contrib.tensorrt as trt # must import this although we will not use it explicitly
from tensorflow.python.platform import gfile
import numpy as np
from tensorflow.keras.applications.inception_resnet_v2 import preprocess_input
from tensorflow.keras.preprocessing import image


In [2]:
import os
files = os.listdir('chest_xray/test/NORMAL/')
img1 = image.load_img(r'chest_xray/test/NORMAL/' + files[0], target_size=(299, 299))
img_array1 = image.img_to_array(img1)
img_array_expanded_dims1 = np.expand_dims(img_array1, axis=0)
input_img = preprocess_input(img_array_expanded_dims1)

for i in files[1:64]:
    img2 = image.load_img('chest_xray/test/NORMAL/' + i, target_size=(299, 299))
    img_array2 = image.img_to_array(img2)
    img_array_expanded_dims2 = np.expand_dims(img_array2, axis=0)
    img2 = preprocess_input(img_array_expanded_dims2)

    input_img = np.concatenate((input_img, img2),axis=0)

In [3]:
input_img.shape

(64, 299, 299, 3)

In [4]:
# function to read a ".pb" model 
# (can be used to read frozen model or TensorRT model)
import time
def read_pb_graph(model):
  with gfile.FastGFile(model,'rb') as f:
    graph_def = tf.GraphDef()
    graph_def.ParseFromString(f.read())
  return graph_def

In [5]:
# variable
TENSORRT_MODEL_PATH = 'chest_TensorRT_model_32.pb'

graph = tf.Graph()
with graph.as_default():
    with tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=0.50))) as sess:
        # read TensorRT model
        trt_graph = read_pb_graph(TENSORRT_MODEL_PATH)

        # obtain the corresponding input-output tensor
        tf.import_graph_def(trt_graph, name='')
        input = sess.graph.get_tensor_by_name('input_1:0')
        output = sess.graph.get_tensor_by_name('dense_1/Sigmoid:0')

        # in this case, it demonstrates to perform inference for 50 times
        total_time = 0; n_time_inference = 50
        out_pred32 = sess.run(output, feed_dict={input: input_img})
        for i in range(n_time_inference):
            t1 = time.time()
            out_pred_32 = sess.run(output, feed_dict={input: input_img})
            t2 = time.time()
            delta_time = t2 - t1
            total_time += delta_time
            print("needed time in inference-" + str(i) + ": ", delta_time)
        avg_time_tensorRT_32 = total_time / n_time_inference
        print("average inference time of fp32: ", avg_time_tensorRT_32)

W0916 10:30:03.613424 140074735155008 deprecation.py:323] From <ipython-input-4-a0f96aa37027>:5: FastGFile.__init__ (from tensorflow.python.platform.gfile) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.gfile.GFile.


needed time in inference-0:  0.22695040702819824
needed time in inference-1:  0.26958417892456055
needed time in inference-2:  0.25417065620422363
needed time in inference-3:  0.2622084617614746
needed time in inference-4:  0.2608671188354492
needed time in inference-5:  0.26207971572875977
needed time in inference-6:  0.2536125183105469
needed time in inference-7:  0.25693774223327637
needed time in inference-8:  0.265289306640625
needed time in inference-9:  0.25855040550231934
needed time in inference-10:  0.2718379497528076
needed time in inference-11:  0.26901865005493164
needed time in inference-12:  0.27494359016418457
needed time in inference-13:  0.2692906856536865
needed time in inference-14:  0.267305850982666
needed time in inference-15:  0.2747161388397217
needed time in inference-16:  0.26824164390563965
needed time in inference-17:  0.2651848793029785
needed time in inference-18:  0.26915884017944336
needed time in inference-19:  0.26670217514038086
needed time in infere

In [6]:
# variable
TENSORRT_MODEL_PATH = 'chest_TensorRT_model_16.pb'

graph = tf.Graph()
with graph.as_default():
    with tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=0.50))) as sess:
        # read TensorRT model
        trt_graph = read_pb_graph(TENSORRT_MODEL_PATH)

        # obtain the corresponding input-output tensor
        tf.import_graph_def(trt_graph, name='')
        input = sess.graph.get_tensor_by_name('input_1:0')
        output = sess.graph.get_tensor_by_name('dense_1/Sigmoid:0')

        # in this case, it demonstrates to perform inference for 50 times
        total_time = 0; n_time_inference = 50
        out_pred16 = sess.run(output, feed_dict={input: input_img})
        for i in range(n_time_inference):
            t1 = time.time()
            out_pred_16 = sess.run(output, feed_dict={input: input_img})
            t2 = time.time()
            delta_time = t2 - t1
            total_time += delta_time
            print("needed time in inference-" + str(i) + ": ", delta_time)
        avg_time_tensorRT_16 = total_time / n_time_inference
        print("average inference time of fp16: ", avg_time_tensorRT_16)

needed time in inference-0:  0.17037320137023926
needed time in inference-1:  0.1736588478088379
needed time in inference-2:  0.16443347930908203
needed time in inference-3:  0.16753911972045898
needed time in inference-4:  0.1743161678314209
needed time in inference-5:  0.180647611618042
needed time in inference-6:  0.16978859901428223
needed time in inference-7:  0.16655707359313965
needed time in inference-8:  0.16333484649658203
needed time in inference-9:  0.16156363487243652
needed time in inference-10:  0.163771390914917
needed time in inference-11:  0.1631169319152832
needed time in inference-12:  0.16362881660461426
needed time in inference-13:  0.16409683227539062
needed time in inference-14:  0.1652052402496338
needed time in inference-15:  0.16817426681518555
needed time in inference-16:  0.16233420372009277
needed time in inference-17:  0.1789228916168213
needed time in inference-18:  0.17987799644470215
needed time in inference-19:  0.17692875862121582
needed time in infe

In [7]:
# variable
TENSORRT_MODEL_PATH = 'chest_TensorRT_model_88.pb'

graph = tf.Graph()
with graph.as_default():
    with tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=0.50))) as sess:
        # read TensorRT model
        trt_graph = read_pb_graph(TENSORRT_MODEL_PATH)

        # obtain the corresponding input-output tensor
        tf.import_graph_def(trt_graph, name='')
        input = sess.graph.get_tensor_by_name('input_1:0')
        output = sess.graph.get_tensor_by_name('dense_1/Sigmoid:0')

        # in this case, it demonstrates to perform inference for 50 times
        total_time = 0; n_time_inference = 50
        out_pred8 = sess.run(output, feed_dict={input: input_img})
        for i in range(n_time_inference):
            t1 = time.time()
            out_pred = sess.run(output, feed_dict={input: input_img})
            t2 = time.time()
            delta_time = t2 - t1
            total_time += delta_time
            print("needed time in inference-" + str(i) + ": ", delta_time)
        avg_time_tensorRT_8 = total_time / n_time_inference
        print("average inference time of int8: ", avg_time_tensorRT_8)

needed time in inference-0:  0.1599578857421875
needed time in inference-1:  0.16600728034973145
needed time in inference-2:  0.1695709228515625
needed time in inference-3:  0.17739367485046387
needed time in inference-4:  0.17656874656677246
needed time in inference-5:  0.1762685775756836
needed time in inference-6:  0.17146801948547363
needed time in inference-7:  0.16624069213867188
needed time in inference-8:  0.16275525093078613
needed time in inference-9:  0.16216802597045898
needed time in inference-10:  0.1631777286529541
needed time in inference-11:  0.16693782806396484
needed time in inference-12:  0.16437029838562012
needed time in inference-13:  0.16263604164123535
needed time in inference-14:  0.16649293899536133
needed time in inference-15:  0.169142484664917
needed time in inference-16:  0.1664581298828125
needed time in inference-17:  0.16732025146484375
needed time in inference-18:  0.17411565780639648
needed time in inference-19:  0.17079830169677734
needed time in in

In [8]:
# variable
FROZEN_MODEL_PATH = 'chest_model.pb'

graph = tf.Graph()
with graph.as_default():
    with tf.Session() as sess:
        # read TensorRT model
        frozen_graph = read_pb_graph(FROZEN_MODEL_PATH)

        # obtain the corresponding input-output tensor
        tf.import_graph_def(frozen_graph, name='')
        input = sess.graph.get_tensor_by_name('input_1:0')
        output = sess.graph.get_tensor_by_name('dense_1/Sigmoid:0')

        # in this case, it demonstrates to perform inference for 50 times
        total_time = 0; n_time_inference = 50
        out_pred = sess.run(output, feed_dict={input: input_img})
        for i in range(n_time_inference):
            t1 = time.time()
            out_pred = sess.run(output, feed_dict={input: input_img})
            t2 = time.time()
            delta_time = t2 - t1
            total_time += delta_time
            print("needed time in inference-" + str(i) + ": ", delta_time)
        avg_time_original_model = total_time / n_time_inference
        print("average inference time: ", avg_time_original_model)
        

needed time in inference-0:  0.884479284286499
needed time in inference-1:  0.32956910133361816
needed time in inference-2:  0.3315303325653076
needed time in inference-3:  0.3311593532562256
needed time in inference-4:  0.34201574325561523
needed time in inference-5:  0.3312687873840332
needed time in inference-6:  0.3297877311706543
needed time in inference-7:  0.32982468605041504
needed time in inference-8:  0.32972049713134766
needed time in inference-9:  0.3287022113800049
needed time in inference-10:  0.33045101165771484
needed time in inference-11:  0.33327817916870117
needed time in inference-12:  0.3340446949005127
needed time in inference-13:  0.3313601016998291
needed time in inference-14:  0.33005762100219727
needed time in inference-15:  0.33277034759521484
needed time in inference-16:  0.3278679847717285
needed time in inference-17:  0.33262109756469727
needed time in inference-18:  0.32912135124206543
needed time in inference-19:  0.33208584785461426
needed time in infer

In [9]:
print("TensorRT improvement compared to the original model:", avg_time_original_model/avg_time_tensorRT_32)
print("TensorRT improvement compared to the original model:", avg_time_original_model/avg_time_tensorRT_16)
print("TensorRT improvement compared to the original model:", avg_time_original_model/avg_time_tensorRT_8)


TensorRT improvement compared to the original model: 1.290296839314097
TensorRT improvement compared to the original model: 2.076927347336751
TensorRT improvement compared to the original model: 2.0398205393759916


In [10]:
sum(out_pred)/64

array([0.44508702], dtype=float32)

In [11]:
sum(out_pred)/64

array([0.44508702], dtype=float32)

In [12]:
sum(out_pred_16)/64

array([0.44433767], dtype=float32)

In [14]:
sum(out_pred8)/64

array([0.444339], dtype=float32)

In [None]:
0.44508702-0.44433767

In [None]:
(0.44508702-0.4450868)

In [17]:
throughput=(1/avg_time_original_model)*64
throughput32=(1/avg_time_tensorRT_32)*64
throughput16=(1/avg_time_tensorRT_16)*64
throughput8=(1/avg_time_tensorRT_8)*64

In [18]:
print(throughput,throughput16, throughput32, throughput8)

186.4647785660624 387.2737979189466 240.5949144271933 380.35468518925023


In [15]:
accu_dif32 = sum(out_pred)/64 - sum(out_pred32)/64
accu_dif16 = sum(out_pred)/64 - sum(out_pred16)/64
accu_dif8 = sum(out_pred)/64 - sum(out_pred8)/64

In [16]:
print(accu_dif32, accu_dif16,accu_dif8)

[2.0861626e-07] [0.00074935] [0.00074801]
