In [1]:
from dataget import data
import tfinterface as ti
import sonnet as snt
import tensorflow as tf
import numpy as np
from matplotlib import pyplot as plt
import cytoolz as cz
from dicto import dicto
import tensorrt as trt
import uff
from tensorrt.parsers import uffparser

MAX_WORKSPACE = 1 << 20 # ADJUST

In [2]:
class AllCNNN(snt.AbstractModule):
    
    def __init__(self, *args, **kwargs):
        kwargs["name"] = kwargs.get("name", "AllCNN")
        super(AllCNNN, self).__init__(*args, **kwargs)
    
    def _build(self, inputs):
        
        print("##########################")
        print("## AllCNN")
        print("##########################")
        
        net = inputs["image"]; print(net)
        training = inputs["mode"] == tf.estimator.ModeKeys.TRAIN
        
        net = ti.layers.conv2d_batch_norm(net, 16, [4, 4], strides = 2, activation = tf.nn.relu, 
                                          padding = "same", batch_norm = dict(training = training)); print(net)
        
        
        net = ti.layers.conv2d_batch_norm(net, 32, [4, 4], strides = 2, activation = tf.nn.relu, 
                                          padding = "same", batch_norm = dict(training = training)); print(net)
        
        
        net = ti.layers.conv2d_batch_norm(net, 64, [3, 3], strides = 1, activation = tf.nn.relu, 
                                          padding = "valid", batch_norm = dict(training = training)); print(net)
        
        
        net = tf.layers.conv2d(net, 10, [3, 3], strides = 1, activation = tf.nn.relu,
                                          padding = "valid"); print(net)
        
        
        
        # global average pooling
        logits = net = tf.reduce_mean(net, axis = [1, 2]); print(net)
        
        # predictions
        predictions = net = tf.nn.softmax(logits); print(net)
    
        print("")
        
        return logits, predictions

In [8]:
params = dicto.load_("parameters.yml")

graph = tf.Graph()

with graph.as_default(), tf.Session(graph = graph) as sess:
    
    inputs = dict(
        image = tf.layers.Input(shape=(28, 28, 1)),
        mode = tf.estimator.ModeKeys.TRAIN,
    )

    all_cnn = AllCNNN()
    logits, predictions = all_cnn(inputs)
    
    graph_def = graph.as_graph_def()
    saver = tf.train.Saver()
    
    snapshot_fpath = tf.train.latest_checkpoint(params.model_dir)
    saver.restore(sess, snapshot_fpath)
    
    # freeze graph and remove nodes used for training 
    frozen_graph = tf.graph_util.convert_variables_to_constants(sess, graph_def, params.OUTPUT_NAMES)
    frozen_graph = tf.graph_util.remove_training_nodes(frozen_graph)
    # Create UFF model and dump it on disk 
    uff_model = uff.from_tensorflow(frozen_graph, params.OUTPUT_NAMES)
    dump = open('all_cnn.uff', 'wb')
    dump.write(uff_model)
    dump.close()

##########################
## AllCNN
##########################
Tensor("input_layer_1:0", shape=(?, 28, 28, 1), dtype=float32)
Tensor("AllCNN/Conv2dBatchNorm/Relu:0", shape=(?, 14, 14, 16), dtype=float32)
Tensor("AllCNN/Conv2dBatchNorm_1/Relu:0", shape=(?, 7, 7, 32), dtype=float32)
Tensor("AllCNN/Conv2dBatchNorm_2/Relu:0", shape=(?, 5, 5, 64), dtype=float32)
Tensor("AllCNN/conv2d/Relu:0", shape=(?, 3, 3, 10), dtype=float32)
Tensor("AllCNN/Mean:0", shape=(?, 10), dtype=float32)
Tensor("AllCNN/Softmax:0", shape=(?, 10), dtype=float32)

INFO:tensorflow:Restoring parameters from models/all_cnn2/model.ckpt-503
INFO:tensorflow:Froze 14 variables.
Converted 14 variables to const ops.
Using output node AllCNN/Softmax
Converting to UFF graph
No. nodes: 39


In [12]:
import pycuda.driver as cuda
import pycuda.autoinit
import argparse

import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
# This is a helper function, provided by TensorRT devs, to run inference
def infer(context, input_img, batch_size):
    
    # load engine
    engine = context.get_engine()
    assert(engine.get_nb_bindings() == 2)
    
    # create output array to receive data
    dims = engine.get_binding_dimensions(1).to_DimsCHW()
    elt_count = dims.C() * dims.H() * dims.W() * batch_size
    
    # convert input data to Float32
    input_img = input_img.astype(np.float32)
    
    # Allocate pagelocked memory
    output = cuda.pagelocked_empty(elt_count, dtype=np.float32)
    
    # alocate device memory
    d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize)
    d_output = cuda.mem_alloc(batch_size * output.size * output.dtype.itemsize)
    bindings = [int(d_input), int(d_output)]
    stream = cuda.Stream()
    
    # transfer input data to device
    cuda.memcpy_htod_async(d_input, input_img, stream)
    
    # execute model
    context.enqueue(batch_size, bindings, stream.handle, None)
    
    # transfer predictions back
    cuda.memcpy_dtoh_async(output, d_output, stream)
    
    # return predictions
    return output

# load model
uff_model = open('all_cnn.uff', 'rb').read()

# create model parser
parser = uffparser.create_uff_parser()

for input in params.INPUTS:
    parser.register_input(input.name, input.size, 0)

for output in params.OUTPUT_NAMES:
    parser.register_output(output)
    
# create inference engine and context (aka session)
trt_logger = trt.infer.ConsoleLogger(trt.infer.LogSeverity.ERROR)
engine = trt.utils.uff_to_trt_engine(logger=trt_logger,
                                     stream=uff_model,
                                     parser=parser,
                                     max_batch_size=2, # 1 sample at a time
                                     max_workspace_size= 1 << 30, # 1 GB GPU memory workspace
                                     datatype=trt.infer.DataType.FLOAT) # that's very cool, you can set precision
context = engine.create_execution_context()

  File "/home/cristian/anaconda2/lib/python2.7/site-packages/tensorrt/utils/_utils.py", line 186, in uff_to_trt_engine
    assert(parser_result)


AssertionError: UFF parsing failed on line 186 in statement assert(parser_result)