https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#import_tf_python
https://qiita.com/dcm_sakai/items/0e13e2917adf55e92745

In [None]:
import numpy as np
import tensorflow as tf
import tensorrt as trt
%matplotlib inline
import matplotlib.pyplot as plt
import uff
import pycuda.driver as cuda
import pycuda.autoinit
import os
import struct

In [None]:
!./sharingan_export.sh

In [None]:
pb_file_in   = '../data/output/frozen_model/frozen.pb'
uff_file_out = '../data/output/frozen_model/frozen.uff'
uff.from_tensorflow_frozen_model(frozen_file=pb_file_in,
                                 output_nodes=["generator/Tanh"],
                                 output_filename=uff_file_out,
                                 list_nodes=False)

In [None]:
fn = "../data/input/temp/000599.bin"

raw_data=np.fromfile(fn, np.float32)
print("raw data shape=", raw_data.shape)

raw_data_reshaped = np.reshape(raw_data,[2,1,-1,1])

input  = raw_data_reshaped[0].reshape([1,1,-1,1])
target = raw_data_reshaped[1].reshape([1,1,-1,1])
print(input.shape)
plt.plot(input[0,0,:,0])

In [None]:
class SharinganCalibrator(trt.IInt8EntropyCalibrator2):
    def __init__(self, batch_data_dir, cache_file):
        print("in. Calibrator init")
        # Whenever you specify a custom constructor for a TensorRT class,
        # you MUST call the constructor of the parent explicitly.
        trt.IInt8EntropyCalibrator2.__init__(self)

        self.cache_file = cache_file
        # Get a list of all the batch files in the batch folder.
        self.batch_files = [os.path.join(batch_data_dir, f) for f in os.listdir(batch_data_dir)]
        # Find out the shape of a batch and then allocate a device buffer of that size.
        #self.shape, _, _ = self.read_batch_file(self.batch_files[0])
        self.shape,_ = self.read_batch_file(self.batch_files[0])
        # Each element of the calibration data is a float32.
        self.device_input = cuda.mem_alloc(trt.volume(self.shape) * trt.float32.itemsize)

        # Create a generator that will give us batches. We can use next() to iterate over the result.
        def load_batches():
            for f in self.batch_files:
                #shape, data, labels = self.read_batch_file(f)
                shape, data = self.read_batch_file(f)
                #yield shape, data, labels
                yield shape, data
        self.batches = load_batches()
        print("out. Calibrator init")

    # This function is used to load calibration data from the calibration batch files.
    # In this implementation, one file corresponds to one batch, but it is also possible to use
    # aggregate data from multiple files, or use only data from portions of a file.
    def read_batch_file(self, filename):
        #with open(filename, "rb") as f:
            # Read the first 4 integers. These will be the NCHW dimensions of the data.
            #shape = tuple(struct.unpack("<L", f.read(trt.int32.itemsize))[0] for _ in range(4))
            # Next read in all the images, where each element of each image is a float32.
            # The remainder of the file consists of labels
            #labels = f.read()
        #return shape, data, labels
        raw_data=np.fromfile(filename, np.float32)
        #print("raw data shape=", raw_data.shape)
        raw_data_reshaped = np.reshape(raw_data,[2,1,-1,1])
        #input  = raw_data_reshaped[0].reshape([1,1,-1,1])
        #print(input)
        #return (1,1,1024,1), input[0].tobytes()
        input  = raw_data_reshaped[0].reshape([1,1,-1])
        return (1,1,1024,), input.tobytes()

    def get_batch_size(self):
        #print("in. Calibrator get_batch_size")
        #print("out. Calibrator get_batch_size", self.shape[0])
        return self.shape[0]

    # TensorRT passes along the names of the engine bindings to the get_batch function.
    # You don't necessarily have to use them, but they can be useful to understand the order of
    # the inputs. The bindings list is expected to have the same ordering as 'names'.
    def get_batch(self, names):
        try:
            # Get a single batch.
            #_, data, _ = next(self.batches)
            _, data = next(self.batches)
            # Copy to device, then return a list containing pointers to input device buffers.
            cuda.memcpy_htod(self.device_input, data)
            return [int(self.device_input)]
        except StopIteration:
            # When we're out of batches, we return either [] or None.
            # This signals to TensorRT that there is no calibration data remaining.
            return None

    def read_calibration_cache(self):
        # If there is a cache, use it instead of calibrating again. Otherwise, implicitly return None.
        if os.path.exists(self.cache_file):
            with open(self.cache_file, "rb") as f:
                return f.read()

    def write_calibration_cache(self, cache):
        with open(self.cache_file, "wb") as f:
            f.write(cache)


In [None]:
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.UffParser() as parser:
    
    batch_data_dir =  "../data/input/temp"
    calibration_cache = "sharingan_calibration.cache"
    calib = SharinganCalibrator(batch_data_dir, cache_file=calibration_cache)
    
    parser.register_input("input", (1, 1, 1024))
    parser.register_output("generator/Tanh")
    parser.parse("../data/output/frozen_model/frozen.uff", network)
    
    builder.max_batch_size = calib.get_batch_size()
    builder.int8_mode = True
    builder.int8_calibrator = calib
    #builder.fp16_mode = True
    
    input_tensor = network.get_input(0)
    input_tensor.set_dynamic_range(-1.0, 1.0)
    output_tensor = network.get_output(0)
    output_tensor.set_dynamic_range(-1.0, 1.0)
    
    with builder.build_cuda_engine(network) as engine:
        for binding in engine:
            dtype = trt.nptype(engine.get_binding_dtype(binding))
            print(dtype)
        # Determine dimensions and create page-locked memory buffers (i.e. won't be swapped to disk) to hold host inputs/outputs.
        h_input  = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(0)), dtype=np.float32)
        h_output = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(1)), dtype=np.float32)
        # Allocate device memory for inputs and outputs.
        d_input = cuda.mem_alloc(h_input.nbytes)
        d_output = cuda.mem_alloc(h_output.nbytes)
        # Create a stream in which to copy inputs/outputs and run inference.
        stream = cuda.Stream()
        
        with engine.create_execution_context() as context:
            #input_u8 = 255.0 * (input + 1.0)/2.0
            #input_u8 = input_u8.astype(np.uint8)
            np.copyto(h_input, input[0,0,:,0])
            # Transfer input data to the GPU.
            cuda.memcpy_htod_async(d_input, h_input, stream)
            # Run inference.
            context.execute_async(bindings=[int(d_input), int(d_output)], stream_handle=stream.handle)
            # Transfer predictions back from the GPU.
            cuda.memcpy_dtoh_async(h_output, d_output, stream)
            # Synchronize the stream
            stream.synchronize()
            # Return the host output. 
            plt.plot(h_output)

In [None]:
for binding in engine:
    dtype = trt.nptype(engine.get_binding_dtype(binding))
    print(dtype)

In [None]:
tf.reset_default_graph()

with tf.gfile.GFile("../data/output/frozen_model/frozen.pb", "rb") as f:
    graph_def = tf.GraphDef()
    graph_def.ParseFromString(f.read())

with tf.Graph().as_default() as gf:
    tf.import_graph_def(graph_def, name="prefix")
    graph = gf

X=graph.get_tensor_by_name('prefix/input:0')
output_node = graph.get_tensor_by_name('prefix/generator/Tanh:0')
with tf.Session(graph=graph) as sess:
    frozen_output=output_node.eval({X: input})

In [None]:
x=np.arange(1024)
plt.plot(x, input[0,0,:,0])
plt.plot(x, frozen_output[0,0,:,0], '-')
plt.plot(h_output)