In [1]:
import tensorrt as trt
import uff

## Settings

In [2]:
FROZEN_GDEF_PATH = 'data/frozen.pb' # ADJUST
ENGINE_PATH = 'data/engine.plan' # ADJUST
INPUT_NODE = 'net/input' # ADJUST
OUTPUT_NODE = 'net/fc8/BiasAdd' # ADJUST
INPUT_SIZE = [3, 224, 224] # ADJUST
MAX_BATCH_SIZE = 1 # ADJUST
MAX_WORKSPACE = 1 << 32 # ADJUST
DATA_TYPE = trt.float16 # ADJUST # float16 | float32

## Convert TF frozen graph to UFF graph

In [3]:
uff_model = uff.from_tensorflow_frozen_model(FROZEN_GDEF_PATH, [OUTPUT_NODE])

=== Automatically deduced input nodes ===
[name: "net/input"
op: "Placeholder"
attr {
  key: "dtype"
  value {
    type: DT_FLOAT
  }
}
attr {
  key: "shape"
  value {
    shape {
      dim {
        size: 1
      }
      dim {
        size: 224
      }
      dim {
        size: 224
      }
      dim {
        size: 3
      }
    }
  }
}
]

Using output node net/fc8/BiasAdd
Converting to UFF graph
No. nodes: 108


## Create TRT model builder

In [4]:
trt_logger = trt.Logger(trt.Logger.INFO)

builder = trt.Builder(trt_logger)
builder.max_batch_size = MAX_BATCH_SIZE
builder.max_workspace_size = MAX_WORKSPACE
builder.fp16_mode = (DATA_TYPE == trt.float16)

## Create UFF parser

In [5]:
parser = trt.UffParser()
parser.register_input(INPUT_NODE, INPUT_SIZE)
parser.register_output(OUTPUT_NODE)

True

## Parse UFF graph

In [6]:
network = builder.create_network()
parser.parse_buffer(uff_model, network)

True

## Build optimized inference engine

In [7]:
engine = builder.build_cuda_engine(network)

## Save inference engine

In [8]:
with open(ENGINE_PATH, "wb") as f:
    f.write(engine.serialize())