https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#import_tf_python
https://qiita.com/dcm_sakai/items/0e13e2917adf55e92745

In [None]:
import numpy as np
import tensorflow as tf
import tensorrt as trt
%matplotlib inline
import matplotlib.pyplot as plt
import uff
import pycuda.driver as cuda
import pycuda.autoinit

In [None]:
pb_file_in   = '../data/output/frozen_model/frozen.pb'
uff_file_out = '../data/output/frozen_model/frozen.uff'
uff.from_tensorflow_frozen_model(frozen_file=pb_file_in,
                                 output_nodes=["generator/Tanh"],
                                 output_filename=uff_file_out,
                                 list_nodes=True)

In [None]:
fn = "../data/input/evaluation/002080.bin"

raw_data=np.fromfile(fn, np.float32)
print("raw data shape=", raw_data.shape)

raw_data_reshaped = np.reshape(raw_data,[2,1,-1,1])

input  = raw_data_reshaped[0].reshape([1,1,-1,1])
target = raw_data_reshaped[1].reshape([1,1,-1,1])
print(input.shape)
plt.plot(input[0,0,:,0])

In [None]:
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.UffParser() as parser:
    parser.register_input("input", (1, 1, 1024))
    parser.register_output("generator/Tanh")
    parser.parse("../data/output/frozen_model/frozen.uff", network)
    
    builder.max_workspace_size = 1 <<  20
    builder.fp16_mode = True
    builder.strict_type_constraints = True
    with builder.build_cuda_engine(network) as engine:
        # Determine dimensions and create page-locked memory buffers (i.e. won't be swapped to disk) to hold host inputs/outputs.
        h_input  = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(0)), dtype=np.float32)
        h_output = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(1)), dtype=np.float32)
        # Allocate device memory for inputs and outputs.
        d_input = cuda.mem_alloc(h_input.nbytes)
        d_output = cuda.mem_alloc(h_output.nbytes)
        # Create a stream in which to copy inputs/outputs and run inference.
        stream = cuda.Stream()
        
        with engine.create_execution_context() as context:
            np.copyto(h_input, input[0,0,:,0])
            # Transfer input data to the GPU.
            cuda.memcpy_htod_async(d_input, h_input, stream)
            # Run inference.
            context.execute_async(bindings=[int(d_input), int(d_output)], stream_handle=stream.handle)
            # Transfer predictions back from the GPU.
            cuda.memcpy_dtoh_async(h_output, d_output, stream)
            # Synchronize the stream
            stream.synchronize()
            # Return the host output. 
            plt.plot(h_output)

In [None]:
tf.reset_default_graph()

with tf.gfile.GFile("../data/output/frozen_model/frozen.pb", "rb") as f:
    graph_def = tf.GraphDef()
    graph_def.ParseFromString(f.read())

with tf.Graph().as_default() as gf:
    tf.import_graph_def(graph_def, name="prefix")
    graph = gf

X=graph.get_tensor_by_name('prefix/input:0')
output_node = graph.get_tensor_by_name('prefix/generator/Tanh:0')
with tf.Session(graph=graph) as sess:
    frozen_output=output_node.eval({X: input})

In [None]:
x=np.arange(1024)
plt.plot(x, input[0,0,:,0])
plt.plot(x, frozen_output[0,0,:,0], '-')
plt.plot(h_output)