In [0]:
from tensorflow.keras import backend as K
# This line must be executed before loading Keras model.
K.set_learning_phase(0)

In [32]:
from tensorflow.keras.models import load_model
model = load_model('/content/Detecting-Pneumonia.h5')
print(model.outputs)
# [<tf.Tensor 'dense_2/Softmax:0' shape=(?, 10) dtype=float32>]
print(model.inputs)

[<tf.Tensor 'dense_2/Identity:0' shape=(None, 2) dtype=float32>]
[<tf.Tensor 'input_1_2:0' shape=(None, 224, 224, 3) dtype=float32>]


In [0]:
import tensorflow as tf
from tensorflow.keras import backend as K


def freeze_session(session, keep_var_names=None, output_names=None, clear_devices=True):
    """
    Freezes the state of a session into a pruned computation graph.

    Creates a new computation graph where variable nodes are replaced by
    constants taking their current value in the session. The new graph will be
    pruned so subgraphs that are not necessary to compute the requested
    outputs are removed.
    @param session The TensorFlow session to be frozen.
    @param keep_var_names A list of variable names that should not be frozen,
                          or None to freeze all the variables in the graph.
    @param output_names Names of the relevant graph outputs.
    @param clear_devices Remove the device directives from the graph for better portability.
    @return The frozen graph definition.
    """
    from tensorflow.python.framework.graph_util import convert_variables_to_constants
    graph = session.graph
    with graph.as_default():
        freeze_var_names = list(set(v.op.name for v in tf.global_variables()).difference(keep_var_names or []))
        output_names = output_names or []
        output_names += [v.op.name for v in tf.global_variables()]
        # Graph -> GraphDef ProtoBuf
        input_graph_def = graph.as_graph_def()
        if clear_devices:
            for node in input_graph_def.node:
                node.device = ""
        frozen_graph = convert_variables_to_constants(session, input_graph_def,
                                                      output_names, freeze_var_names)
        return frozen_graph


frozen_graph = freeze_session(K.get_session(), output_names=[out.op.name for out in model.outputs])

In [0]:
tf.train.write_graph(frozen_graph, "model", "tf_model.pb", as_text=False)

'model/tf_model.pb'

In [0]:
import tensorflow as tf
from tensorflow.python.platform import gfile
sess = tf.Session()
f = gfile.FastGFile("./model/tf_model.pb", 'rb')
graph_def = tf.GraphDef()
# Parses a serialized binary message into the current message.
graph_def.ParseFromString(f.read())
f.close()

sess.graph.as_default()
# Import a serialized TensorFlow `GraphDef` protocol buffer
# and place into the current default `Graph`.
tf.import_graph_def(graph_def)

In [0]:
from tensorflow.python.compiler.tensorrt import trt_convert as trt
converter = trt.TrtGraphConverter(
	input_graph_def=frozen_graph,
	nodes_blacklist=['dense_1/Softmax:0'])
frozen_graph = converter.convert()

INFO:tensorflow:Linked TensorRT version: (0, 0, 0)
INFO:tensorflow:Loaded TensorRT version: (0, 0, 0)
INFO:tensorflow:Running against TensorRT version 0.0.0


In [0]:
tf.train.write_graph(frozen_graph,"inception","tftrt_model.pb",as_text=False)

'inception/tftrt_model.pb'

In [0]:
all_nodes=len([1 for n in frozen_graph.node])
print("no. of all_nodes in frozen graph: ",all_nodes)
trt_engine_nodes = len([1 for n in graph_def.node if str(n.op)=='TRTeng'])
print("no. of trt_engine_nodes in tensorrt graph: ",trt_engine_nodes)
all_nodes=len([1 for n in graph_def.node])
print("no. of all_nodes in tensorrt graph: ",all_nodes)

no. of all_nodes in frozen graph:  517
no. of trt_engine_nodes in tensorrt graph:  0
no. of all_nodes in tensorrt graph:  1726


In [0]:
from tensorflow.keras.preprocessing import image
import numpy as np
from tensorflow.keras.applications.resnet_v2 import preprocess_input

In [0]:
import os
files = os.listdir('chest_xray/test/NORMAL/')
img1 = image.load_img(r'chest_xray/test/NORMAL/' + files[0], target_size=(224, 224))
img_array1 = image.img_to_array(img1)
img_array_expanded_dims1 = np.expand_dims(img_array1, axis=0)
input_img = preprocess_input(img_array_expanded_dims1)

for i in files[1:64]:
    img2 = image.load_img('chest_xray/test/NORMAL/' + i, target_size=(224, 224))
    img_array2 = image.img_to_array(img2)
    img_array_expanded_dims2 = np.expand_dims(img_array2, axis=0)
    img2 = preprocess_input(img_array_expanded_dims2)

    input_img = np.concatenate((input_img, img2),axis=0)

In [0]:
# function to read a ".pb" model 
# (can be used to read frozen model or TensorRT model)
def read_pb_graph(model):
  with gfile.FastGFile(model,'rb') as f:
    graph_def = tf.GraphDef()
    graph_def.ParseFromString(f.read())
  return graph_def

In [0]:
# variable
TENSORRT_MODEL_PATH = '/content/model/tf_model.pb'
import time
graph = tf.Graph()
with graph.as_default():
    with tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=0.50))) as sess:
        # read TensorRT model
        trt_graph = read_pb_graph(TENSORRT_MODEL_PATH)

        # obtain the corresponding input-output tensor
        tf.import_graph_def(trt_graph, name='')
        input = sess.graph.get_tensor_by_name('input_1_1:0')
        output = sess.graph.get_tensor_by_name('dense_1/Softmax:0')

        # in this case, it demonstrates to perform inference for 50 times
        total_time = 0; n_time_inference = 50
        out_pred = sess.run(output, feed_dict={input:input_img})
        for i in range(n_time_inference):
            t1 = time.time()
            out_pred = sess.run(output, feed_dict={input:input_img})
            t2 = time.time()
            delta_time = t2 - t1
            total_time += delta_time
            print("needed time in inference-" + str(i) + ": ", delta_time)
        avg_time_tensorRT = total_time / n_time_inference
        print("average inference time: ", avg_time_tensorRT)

needed time in inference-0:  0.15371346473693848
needed time in inference-1:  0.1522524356842041
needed time in inference-2:  0.15550637245178223
needed time in inference-3:  0.15519475936889648
needed time in inference-4:  0.154343843460083
needed time in inference-5:  0.15735530853271484
needed time in inference-6:  0.15699219703674316
needed time in inference-7:  0.15268540382385254
needed time in inference-8:  0.15582847595214844
needed time in inference-9:  0.1558988094329834
needed time in inference-10:  0.15273475646972656
needed time in inference-11:  0.15705513954162598
needed time in inference-12:  0.15645337104797363
needed time in inference-13:  0.15340852737426758
needed time in inference-14:  0.15452790260314941
needed time in inference-15:  0.15575480461120605
needed time in inference-16:  0.15274858474731445
needed time in inference-17:  0.15604901313781738
needed time in inference-18:  0.15709829330444336
needed time in inference-19:  0.15480709075927734
needed time in