In [1]:
!nvidia-smi

Thu Feb  7 00:09:25 2019       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 418.25       Driver Version: 418.25       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla V100-SXM3...  On   | 00000000:34:00.0 Off |                    0 |
| N/A   33C    P0    53W / 350W |      0MiB / 32480MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
|   1  Tesla V100-SXM3...  On   | 00000000:36:00.0 Off |                    0 |
| N/A   32C    P0    52W / 350W |      0MiB / 32480MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
|   2  Tesla V100-SXM3...  On   | 00000000:39:00.0 Off |                    0 |
| N/A   

In [2]:
import os
import numpy as np
import pycuda.driver as cuda
import pycuda.autoinit
import tensorflow as tf; print('TensorFlow Version: {}'.format(tf.__version__))
import tensorrt as trt; print('TensorRT Version: {}'.format(trt.__version__))
import uff

TensorFlow Version: 1.12.0
TensorRT Version: 5.0.2.6


In [3]:
base_dir = os.path.join('/', 'datasets', 'retail', 'iva', 'industrial-inspection', 'pg-defect-segmentation')
models_dir = os.path.join(base_dir, 'models')
artifacts_dir = os.path.join(models_dir, 'artifacts')

In [4]:
# network settings
model_path = os.path.join(artifacts_dir, 'frozen_graph.uff')
input_names = ['input_1']
output_names = ['conv2d_19_2/Sigmoid']
# n_channel, n_height, n_width = 1, 512, 512
n_channel, n_height, n_width = 3, 512, 512
# n_channel, n_height, n_width = 1, 512, 1136
# n_channel, n_height, n_width = 1, 1024, 1024
# n_channel, n_height, n_width = 1, 1024, 2272
# n_channel, n_height, n_width = 1, 2048, 2048
# n_channel, n_height, n_width = 1, 4096, 4096
dimensions = [n_channel, n_height, n_width]
batch_size = 1
precision = 'fp16'  # options are 'fp16' (default), 'int8', and 'fp32'
architecture = 't4'  # options are 't4' (default) and 'xavier'

In [5]:
# You can set the logger severity higher to suppress messages (or lower to display more messages).
# TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
TRT_LOGGER = trt.Logger(trt.Logger.INFO)

In [6]:
def build_engine(filename):
    # For more information on TRT basics, refer to the introductory samples.
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.UffParser() as parser:
        # Set max workspace
        builder.max_workspace_size = 1 << 30
        # Parse the Uff Network
        parser.register_input(input_names[0], dimensions)
        parser.register_output(output_names[0])
        parser.parse(filename, network)
        # Set precision
        if precision == 'fp16':
            builder.fp16_mode = True
        elif precision == 'int8':
            builder.int8_mode = True
        # Set batch size
        #builder.max_batch_size = batch_size
        # Build and return an engine.
        return builder.build_cuda_engine(network)


In [7]:
engine = build_engine(model_path)
print(engine)

<tensorrt.tensorrt.ICudaEngine object at 0x7fe2d9ce88b8>


In [8]:
# write engine
engine_path = os.path.join(artifacts_dir, 'defect_classifier_{}_b{}_{}.engine')
engine_path = engine_path.format(architecture, batch_size, precision)
with open(engine_path, 'wb') as file:
    print('Writing: {}'.format(engine_path))
    file.write(engine.serialize())

Writing: /datasets/retail/iva/industrial-inspection/pg-defect-segmentation/models/artifacts/defect_classifier_t4_b1_fp16.engine
