# Magma Keras to TensorRT

### Importing

In [1]:
import keras
from keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from keras.callbacks import ModelCheckpoint, TensorBoard
from keras.optimizers import Adam
from keras.models import Sequential,load_model, Model
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

import tensorflow as tf
import tensorflow.contrib.tensorrt as trt

import time 
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np

import os
from os.path import isfile, exists, isdir, join

import shutil

Using TensorFlow backend.


### For Jetson TX2

In [2]:
from keras import backend as K

config = tf.ConfigProto()
config.gpu_options.allow_growth = True
K.set_session(tf.Session(config=config))

In [3]:
CNN_MODEL = 'MagmaCnnClassifier.hdf5'
TARGET_SHAPE = 8
DATA_SHAPE = (100,100,3)
OPTIMIZER = Adam()
BATCH_SIZE = 256
EPOCH = 2

SAVED_MODEL_DIR = './saved_model/'
MODEL_NAME = CNN_MODEL

DATA_SHAPE = (100,100,3)
TRAIN_DIR = "./data/train"
TEST_DIR = "./data/test"
RESULT_PREDICTION_CALLBACK = None

sub_dir = time.strftime("%Y_%m_%d-%H_%M_%S", time.localtime())

# Data preprocessing
### create predict_dir by move random pictures from test_dir  
#### remove and copy test_dir

In [4]:
TEST_COPY_DIR = './data/test_dir'
PREDICT_DIR = './data/predict_dir'

# delete folder if exist
if exists(TEST_COPY_DIR) and isdir(TEST_COPY_DIR):
    shutil.rmtree(TEST_COPY_DIR)
print('remove if exist test_dir success')

if exists(PREDICT_DIR) and isdir(PREDICT_DIR):
    shutil.rmtree(PREDICT_DIR)
print('remove if exist predict_dir success')

#copy test as test_dir, there are result as list of copy files
# from distutils.dir_util import copy_tree
# copy_tree('./data/test','./data/test_dir')
from subprocess import call
call(['cp','-a', TEST_DIR, TEST_COPY_DIR])
print('copy test to test_dir success')

remove if exist test_dir success
remove if exist predict_dir success
copy test to test_dir success


#### create predict_dir and random moving images

In [5]:
#random select images
CATEGORIES = ['0','1','2','3','4','5','6','7']
IMAGES_PER_FOLDER = 3

import random
for category in CATEGORIES:
    
    path_ct = join(TEST_COPY_DIR,category)
    path_pd = join(PREDICT_DIR, category)
    
    if not exists(path_pd):
        os.makedirs(path_pd)
    
    image_list = os.listdir(path_ct)
    random.shuffle(image_list)
    
    for img in image_list[:IMAGES_PER_FOLDER]:
        path_src = join(path_ct,img)
        path_des = join(path_pd,img)
        shutil.move(path_src, path_des)
    
    print('copy - category:',category, image_list[:IMAGES_PER_FOLDER])
    
    

copy - category: 0 ['2506.png', '3641.png', '9631.png']
copy - category: 1 ['7784.png', '2555.png', '1955.png']
copy - category: 2 ['6655.png', '507.png', '1713.png']
copy - category: 3 ['1017.png', '6354.png', '4301.png']
copy - category: 4 ['510.png', '6733.png', '5105.png']
copy - category: 5 ['6708.png', '4340.png', '8349.png']
copy - category: 6 ['1421.png', '4295.png', '1443.png']
copy - category: 7 ['10121.png', '6196.png', '3401.png']


# WorkFlow

### Keras to TensorRT
![alt text](pictures/Keras_to_TensorRT.png)

### Tensorflow to TensorRT
![alt text](pictures/tf-trt_workflow.png)

## a0. Convert Keras to Tensorflow model and a) Read input Tensorflow model
##### don't need to read input Tensorflow because i don't save .mega tensorflow file from keras
### Build Keras model (Declare + Train)
#### a.1) Declare Model

In [9]:
model = Sequential([
    # Layer 1
    Conv2D(filters=32, kernel_size=(3, 3), activation='relu', input_shape=DATA_SHAPE, name='input_tensor'),
    MaxPooling2D(pool_size=(2, 2)),
    # Layer 2
    Conv2D(filters=64, kernel_size=(3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    # Layer 3
    Conv2D(filters=128, kernel_size=(3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    # Layer 4
    Conv2D(filters=128, kernel_size=(3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    # flattening the model for fully connected layer
    Flatten(),
    Dropout(rate=0.5),
    # fully connected layer
    Dense(units=512, activation='relu'),
    # output layer
    Dense(units=TARGET_SHAPE, activation='softmax', name='output_tensor'),
])

# Compilile the network
model.compile(loss='categorical_crossentropy',
                    optimizer=OPTIMIZER,
                    metrics=['categorical_accuracy'])

model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_tensor (Conv2D)        (None, 98, 98, 32)        896       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 49, 49, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 47, 47, 64)        18496     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 23, 23, 64)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 21, 21, 128)       73856     
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 10, 10, 128)       0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 8, 8, 128)         147584    
__________

#### a.2) create generator

In [10]:
train_datagen = ImageDataGenerator(
    rescale=1 / 255,
    rotation_range=20,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    zoom_range=0.1
)

val_datagen = ImageDataGenerator(rescale=1 / 255)

train_generator = train_datagen.flow_from_directory(
    directory = TRAIN_DIR ,
    target_size = DATA_SHAPE[:2],
    batch_size=BATCH_SIZE,
    class_mode='categorical'
)

val_generator = val_datagen.flow_from_directory(
    directory = TEST_COPY_DIR, # use test data that be splited for prediction
    target_size = DATA_SHAPE[:2],
    batch_size=BATCH_SIZE,
    class_mode='categorical'
)

Found 4917 images belonging to 8 classes.
Found 1206 images belonging to 8 classes.


#### a.3) Train model

In [11]:
tb_cb = TensorBoard(log_dir='./logs/'+sub_dir,
                    #histogram_freq=EPOCHS,
                    batch_size=BATCH_SIZE, 
                    write_grads=True,
                    write_images=True,
                    #update_freq='batch',
                    )
callbacks = [tb_cb]

# train_generator, val_generator declare Data preprocessing

model.fit_generator(generator=train_generator,
                              steps_per_epoch=train_generator.n//BATCH_SIZE,
                                epochs=EPOCH,
                                validation_data=val_generator,
                              validation_steps=val_generator.n//BATCH_SIZE,
                                callbacks=callbacks)

Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x7f4008bc18>

### --save model .hdf5

In [12]:
model.save(SAVED_MODEL_DIR+CNN_MODEL)

### --load model .hdf5

In [6]:
# K.get_session().reset()
K.clear_session()

In [7]:
if isfile(SAVED_MODEL_DIR+MODEL_NAME):
    model = load_model(filepath=SAVED_MODEL_DIR+MODEL_NAME)
    model.summary()
else:
    raise Exception("--MODEL COULD NOT LOADED")

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_tensor (Conv2D)        (None, 98, 98, 32)        896       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 49, 49, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 47, 47, 64)        18496     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 23, 23, 64)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 21, 21, 128)       73856     
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 10, 10, 128)       0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 8, 8, 128)         147584    
__________

### Keras to TensorRT
![alt text](pictures/Keras_to_TensorRT.png)
## b) Convert to Frozen model .pb
#### b.1) declare function

In [8]:
pb_filename = 'Magma_frozen_model.pb'

def freeze_session(session, keep_var_names=None, output_names=None, clear_devices=True):
    """
    Freezes the state of a session into a pruned computation graph.
    Creates a new computation graph where variable nodes are replaced by
    constants taking their current value in the session. The new graph will be
    pruned so subgraphs that are not necessary to compute the requested
    outputs are removed.
    @param session The TensorFlow session to be frozen.
    @param keep_var_names A list of variable names that should not be frozen,
                          or None to freeze all the variables in the graph.
    @param output_names Names of the relevant graph outputs.
    @param clear_devices Remove the device directives from the graph for better portability.
    @return The frozen graph definition.
    """
    from tensorflow.python.framework.graph_util import convert_variables_to_constants
    graph = session.graph
    with graph.as_default():
        freeze_var_names = list(set(v.op.name for v in tf.global_variables()).difference(keep_var_names or []))
        output_names = output_names or []
        output_names += [v.op.name for v in tf.global_variables()]
        input_graph_def = graph.as_graph_def()
        if clear_devices:
            for node in input_graph_def.node:
                node.device = ""
        frozen_graph = convert_variables_to_constants(session, input_graph_def, output_names, freeze_var_names)
        return frozen_graph

#### b.2) Save the model to Protocol Buffers Format (.pb) as tf pb file

In [9]:
from keras import backend as K
from tensorflow.python.platform import gfile

# model is used here 
# K.set_session(tf.Session(graph=model.output.graph)) 
# init = K.tf.global_variables_initializer() 
# K.get_session().run(init)

frozen_graph = freeze_session(K.get_session(), output_names=[out.op.name for out in model.outputs])

INFO:tensorflow:Froze 53 variables.
INFO:tensorflow:Converted 53 variables to const ops.


In [10]:
#write the TensorRT model to be used later for inference
pb_filename = "Magma_frozen_model-"+sub_dir+".pb"

with gfile.FastGFile(SAVED_MODEL_DIR + pb_filename, 'wb') as f:
    f.write(frozen_graph.SerializeToString())
print(pb_filename + " is successfully stored!")

Magma_frozen_model-2019_07_08-15_30_13.pb is successfully stored!


In [11]:
for inp in model.inputs:
    print(inp.op.name)

input_tensor_input


In [12]:
for out in model.outputs:
    print(out.op.name)

output_tensor/Softmax


## Load a PB File by Tensorflow

In [23]:
from tensorflow.python.platform import gfile
with tf.Session() as sess:
    with gfile.FastGFile(name=SAVED_MODEL_DIR + pb_filename,mode='rb') as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())
        sess.graph.as_default()
        g_in = tf.import_graph_def(graph_def)
    
    writer = tf.summary.FileWriter('./saved_model/Magma_frozen_log/'+sub_dir)
    writer.add_graph(sess.graph)
    writer.flush()
    writer.close()

### Keras to TensorRT
![alt text](pictures/Keras_to_TensorRT.png)
## c) Optimize the frozen model to TensorRT graph

In [18]:
outputs = [out.op.name for out in model.outputs] # name of output layer

# convert (optimize) frozen model to TensorRT model
trt_graph = trt.create_inference_graph(
    input_graph_def=frozen_graph,# frozen model
    outputs=outputs,
    max_batch_size=BATCH_SIZE,# specify your max batch size
    max_workspace_size_bytes=2*(10**9),# specify the max workspace
    precision_mode="FP32") # precision, can be "FP32" (32 floating point precision) or "FP16"

In [14]:
#write the TensorRT model to be used later for inference
pb_trt_filename = "TensorRT_Magma_model-"+sub_dir+".pb"

with gfile.FastGFile(SAVED_MODEL_DIR + pb_trt_filename, 'wb') as f:
    f.write(trt_graph.SerializeToString())
print(pb_trt_filename + " is successfully stored!")

TensorRT_Magma_model-2019_07_08-15_30_13.pb is successfully stored!


## Load a TensorRT PB File by Tensorflow

In [16]:
from tensorflow.python.platform import gfile
with tf.Session() as sess:
    with gfile.FastGFile(name=SAVED_MODEL_DIR+pb_trt_filename, mode='rb') as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())
        sess.graph.as_default()
        g_in = tf.import_graph_def(graph_def)
    
    writer = tf.summary.FileWriter('./saved_model/TensorRT_frozen_log/'+sub_dir)
    writer.add_graph(sess.graph)
    writer.flush()
    writer.close()

### c.2) Count how many nodes/operations before and after optimization

In [17]:
# check how many ops of the original frozen model
all_nodes = len([1 for n in frozen_graph.node])
print("numb. of all_nodes in frozen graph:", all_nodes)

# check how many ops that is converted to TensorRT engine
trt_engine_nodes = len([1 for n in trt_graph.node if str(n.op) == 'TRTEngineOp'])
print("numb. of trt_engine_nodes in TensorRT graph:", trt_engine_nodes)
all_nodes = len([1 for n in trt_graph.node])
print("numb. of all_nodes in TensorRT graph:", all_nodes)

numb. of all_nodes in frozen graph: 120
numb. of trt_engine_nodes in TensorRT graph: 3
numb. of all_nodes in TensorRT graph: 29


### c.3) Visualize the original and optimized graphs
Using [netron](https://lutzroeder.github.io/netron/), the web application for vitsulaize model graph by upload .pb file.