# Magma Keras to TensorRT

### Importing

In [1]:
import keras
from keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from keras.callbacks import ModelCheckpoint, TensorBoard
from keras.optimizers import Adam
from keras.models import Sequential,load_model, Model
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

import tensorflow as tf
import tensorflow.contrib.tensorrt as trt
from tensorflow.python.platform import gfile


import time 
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np

import os
from os.path import isfile, exists, isdir, join

import shutil

Using TensorFlow backend.


### For Jetson TX2

In [2]:
from keras import backend as K

config = tf.ConfigProto()
config.gpu_options.allow_growth = True
K.set_session(tf.Session(config=config))

tf.keras.backend.set_learning_phase(0)

In [3]:
CNN_MODEL = 'MagmaCnnClassifier.hdf5'
TARGET_SHAPE = 8
DATA_SHAPE = (100,100,3)
OPTIMIZER = Adam()
BATCH_SIZE = 256
EPOCH = 2

SAVED_MODEL_DIR = './saved_model/'
MODEL_NAME = CNN_MODEL

DATA_SHAPE = (100,100,3)
TRAIN_DIR = "./data/train"
TEST_DIR = "./data/test"
RESULT_PREDICTION_CALLBACK = None

# Data preprocessing
### create predict_dir by move random pictures from test_dir  
#### remove and copy test_dir

In [4]:
TEST_COPY_DIR = './data/test_dir'
PREDICT_DIR = './data/predict_dir'

In [None]:
# delete folder if exist
if exists(TEST_COPY_DIR) and isdir(TEST_COPY_DIR):
    shutil.rmtree(TEST_COPY_DIR)
print('remove if exist test_dir success')

if exists(PREDICT_DIR) and isdir(PREDICT_DIR):
    shutil.rmtree(PREDICT_DIR)
print('remove if exist predict_dir success')

#copy test as test_dir, there are result as list of copy files
# from distutils.dir_util import copy_tree
# copy_tree('./data/test','./data/test_dir')
from subprocess import call
call(['cp','-a', TEST_DIR, TEST_COPY_DIR])
print('copy test to test_dir success')

#### create predict_dir and random moving images

In [5]:
#random select images
CATEGORIES = ['0','1','2','3','4','5','6','7']
IMAGES_PER_FOLDER = 1

In [None]:
import random
for category in CATEGORIES:
    
    path_ct = join(TEST_COPY_DIR,category)
    path_pd = join(PREDICT_DIR, category)
    
    if not exists(path_pd):
        os.makedirs(path_pd)
    
    image_list = os.listdir(path_ct)
    random.shuffle(image_list)
    
    for img in image_list[:IMAGES_PER_FOLDER]:
        path_src = join(path_ct,img)
        path_des = join(path_pd,img)
        shutil.move(path_src, path_des)
    
    print('copy - category:',category, image_list[:IMAGES_PER_FOLDER])
    
    

# WorkFlow

### Keras to TensorRT
![alt text](pictures/Keras_to_TensorRT.png)

### Tensorflow to TensorRT
![alt text](pictures/tf-trt_workflow.png)

## a0. Convert Keras to Tensorflow model and a) Read input Tensorflow model
##### don't need to read input Tensorflow because i don't save .mega tensorflow file from keras
### Build Keras model (Declare + Train)
#### a.1) Declare Model

In [None]:
model = Sequential([
    # Layer 1
    Conv2D(filters=32, kernel_size=(3, 3), activation='relu', input_shape=DATA_SHAPE, name='input_tensor'),
    MaxPooling2D(pool_size=(2, 2)),
    # Layer 2
    Conv2D(filters=64, kernel_size=(3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    # Layer 3
    Conv2D(filters=128, kernel_size=(3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    # Layer 4
    Conv2D(filters=128, kernel_size=(3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    # flattening the model for fully connected layer
    Flatten(),
    Dropout(rate=0.5),
    # fully connected layer
    Dense(units=512, activation='relu'),
    # output layer
    Dense(units=TARGET_SHAPE, activation='softmax', name='output_tensor'),
])

# Compilile the network
model.compile(loss='categorical_crossentropy',
                    optimizer=OPTIMIZER,
                    metrics=['categorical_accuracy'])

model.summary()

#### a.2) create generator

In [None]:
train_datagen = ImageDataGenerator(
    rescale=1 / 255,
    rotation_range=20,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    zoom_range=0.1
)

val_datagen = ImageDataGenerator(rescale=1 / 255)

train_generator = train_datagen.flow_from_directory(
    directory = TRAIN_DIR ,
    target_size = DATA_SHAPE[:2],
    batch_size=BATCH_SIZE,
    class_mode='categorical'
)

val_generator = val_datagen.flow_from_directory(
    directory = TEST_COPY_DIR, # use test data that be splited for prediction
    target_size = DATA_SHAPE[:2],
    batch_size=BATCH_SIZE,
    class_mode='categorical'
)

#### a.3) Train model

In [None]:
sub_dir = time.strftime("%Y_%m_%d-%H_%M_%S", time.localtime())
tb_cb = TensorBoard(log_dir='./logs/'+sub_dir,
                    #histogram_freq=EPOCHS,
                    batch_size=BATCH_SIZE, 
                    write_grads=True,
                    write_images=True,
                    #update_freq='batch',
                    )
callbacks = [tb_cb]

# train_generator, val_generator declare Data preprocessing

model.fit_generator(generator=train_generator,
                              steps_per_epoch=train_generator.n//BATCH_SIZE,
                                epochs=EPOCH,
                                validation_data=val_generator,
                              validation_steps=val_generator.n//BATCH_SIZE,
                                callbacks=callbacks)

In [None]:
# name of all nodes
[n.name for n in tf.get_default_graph().as_graph_def().node]

###### ----------------------------------------example model from medium

In [6]:
# https://medium.com/@pipidog/how-to-convert-your-keras-models-to-tensorflow-e471400b886a
# 1) build keras model + train
x = np.vstack((np.random.rand(1000,10),-np.random.rand(1000,10)))
y = np.vstack((np.ones((1000,1)),np.zeros((1000,1))))
print(x.shape)
print(y.shape)

model = Sequential()
model.add(Dense(units = 32, input_shape=(10,), activation='relu'))
model.add(Dense(units = 16, activation='relu'))
model.add(Dense(units = 1, activation='sigmoid'))

model.summary()

model.compile(loss='binary_crossentropy',optimizer='Adam',metrics=['binary_accuracy'])
model.fit(x=x,y=y,epochs=2,validation_split=0.2)

(2000, 10)
(2000, 1)
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 32)                352       
_________________________________________________________________
dense_2 (Dense)              (None, 16)                528       
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 17        
Total params: 897
Trainable params: 897
Non-trainable params: 0
_________________________________________________________________
Train on 1600 samples, validate on 400 samples
Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x7f2cc37ef0>

### --save model .hdf5

In [None]:
model.save(SAVED_MODEL_DIR+CNN_MODEL)

### --load model .hdf5

In [None]:
if isfile(SAVED_MODEL_DIR + MODEL_NAME):
    model = load_model(filepath= SAVED_MODEL_DIR + MODEL_NAME)
    model.summary()
else:
    raise Exception("--MODEL COULD NOT LOADED")

### Keras to TensorRT
![alt text](pictures/Keras_to_TensorRT.png)
## b) Convert to Frozen model .pb
#### b.1) declare function

In [7]:
def freeze_session(session, keep_var_names=None, output_names=None, clear_devices=True):
    """
    Freezes the state of a session into a pruned computation graph.
    Creates a new computation graph where variable nodes are replaced by
    constants taking their current value in the session. The new graph will be
    pruned so subgraphs that are not necessary to compute the requested
    outputs are removed.
    @param session The TensorFlow session to be frozen.
    @param keep_var_names A list of variable names that should not be frozen,
                          or None to freeze all the variables in the graph.
    @param output_names Names of the relevant graph outputs.
    @param clear_devices Remove the device directives from the graph for better portability.
    @return The frozen graph definition.
    """
    from tensorflow.python.framework.graph_util import convert_variables_to_constants
    graph = session.graph
    with graph.as_default():
        freeze_var_names = list(set(v.op.name for v in tf.global_variables()).difference(keep_var_names or []))
        output_names = output_names or []
        output_names += [v.op.name for v in tf.global_variables()]
        input_graph_def = graph.as_graph_def()
        if clear_devices:
            for node in input_graph_def.node:
                node.device = ""
        frozen_graph = convert_variables_to_constants(session, input_graph_def, output_names, freeze_var_names)
        return frozen_graph

#### b.2) Save the model to Protocol Buffers Format (.pb) as tf pb file

In [None]:
# from tensorflow.keras.models import load_model

from keras.models import load_model

K.set_learning_phase(0)

model = load_model(SAVED_MODEL_DIR + MODEL_NAME)

MODEL_PATH = "./saved_model/test/"
saver = tf.train.Saver()
K.get_session().run(K.tf.global_variables_initializer())
sess = K.get_session()
save_path = saver.save(sess, MODEL_PATH)

In [None]:
print(model.output.graph)

In [8]:
from keras import backend as K
from tensorflow.python.platform import gfile

OUTPUTS = [out.op.name for out in model.outputs]
print(OUTPUTS)
print(OUTPUTS[0])
INPUTS = [inp.op.name for inp in model.inputs]
print(INPUTS)
print(INPUTS[0])

# model is used here 
# K.set_session(tf.Session(graph=model.output.graph)) 
init = K.tf.global_variables_initializer() 
K.get_session().run(init)

frozen_graph = freeze_session(K.get_session(), output_names=OUTPUTS)

['dense_3/Sigmoid']
dense_3/Sigmoid
['dense_1_input']
dense_1_input
INFO:tensorflow:Froze 34 variables.
INFO:tensorflow:Converted 34 variables to const ops.


In [9]:
with gfile.FastGFile("./saved_model/sampleConvertFrozen.pb", 'wb') as f:
    f.write(frozen_graph.SerializeToString())
print("Frozen model is successfully stored!")

Frozen model is successfully stored!


In [None]:
time_now = time.strftime("%d-%m-%Y_%H-%M-%S", time.localtime())
filename = "Magma_frozen_model_"+time_now+".pb"

#write the TensorRT model to be used later for inference
with gfile.FastGFile("./saved_model/"+filename, 'wb') as f:
    f.write(frozen_graph.SerializeToString())
print("Frozen model is successfully stored!")

In [None]:
for inp in model.inputs:
    print(inp.op.name)

In [None]:
for out in model.outputs:
    print(out.op.name)

#### ---Load a pb file by tensorflow & Inference using the model

In [None]:
##load & inference the model ==================
from tensorflow.python.platform import gfile

with tf.Session() as sess:
    
    # load model from pb file
    with gfile.FastGFile(SAVED_MODEL_DIR+pb_filename,'rb') as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())
        sess.graph.as_default()
        g_in = tf.import_graph_def(graph_def)
    
    # write to tensorboard (check tensorboard for each op names)
    writer = tf.summary.FileWriter(SAVED_MODEL_DIR+'forzen_log/')
    writer.add_graph(sess.graph)
    writer.flush()
    writer.close()
    
    # print all operation names 
    print('\n===== ouptut operation names =====\n')
    for op in sess.graph.get_operations():
        print(op)
    
    # inference by the model (op name must comes with :0 to specify the index of its output)
    tensor_output = sess.graph.get_tensor_by_name('import/output_tensor/Softmax:0')
    tensor_input = sess.graph.get_tensor_by_name('import/input_tensor_input:0')
    #TODO change get tensor by name to fit to Magma
    predictions = sess.run(tensor_output, {tensor_input: x})
    print('\n===== output predicted results =====\n')
    print(predictions)

### Keras to TensorRT
![alt text](pictures/Keras_to_TensorRT.png)
## c) Optimize the frozen model to TensorRT graph

In [None]:
# outputs = ["dense_2/Softmax"]
# outputs = ["output_tensor/Softmax"]

# convert (optimize) frozen model to TensorRT model
trt_graph = trt.create_inference_graph(
    input_graph_def=frozen_graph,# frozen model
    outputs=OUTPUTS,
    # outputs=outputs,
    max_batch_size=2,# specify your max batch size
    max_workspace_size_bytes=2*(10**9),# specify the max workspace
    precision_mode="FP32") # precision, can be "FP32" (32 floating point precision) or "FP16"

In [None]:
time_now = time.strftime("%d-%m-%Y_%H-%M-%S", time.localtime())
tr_filename = "TensorRT_Magma_model_"+time_now+".pb"

#write the TensorRT model to be used later for inference
with gfile.FastGFile("./saved_model/"+tr_filename, 'wb') as f:
    f.write(trt_graph.SerializeToString())
print("TensorRT model is successfully stored!")

### Count how many nodes/operations before and after optimization

In [None]:
# check how many ops of the original frozen model
all_nodes = len([1 for n in frozen_graph.node])
print("numb. of all_nodes in frozen graph:", all_nodes)

# check how many ops that is converted to TensorRT engine
trt_engine_nodes = len([1 for n in trt_graph.node if str(n.op) == 'TRTEngineOp'])
print("numb. of trt_engine_nodes in TensorRT graph:", trt_engine_nodes)
all_nodes = len([1 for n in trt_graph.node])
print("numb. of all_nodes in TensorRT graph:", all_nodes)

## d) Inference using TensorRT model

#### d.1) Function to read ".pb" model (TensorRT model is stored in ".pb")

In [6]:
# (can be used to read frozen model or TensorRT model)
def read_pb_graph(model):
    with gfile.FastGFile(model,'rb') as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())
    return graph_def

#### prepare data for prediction

In [7]:
images = []
for category in CATEGORIES:
    path_pd = join(PREDICT_DIR, category)
    class_num = CATEGORIES.index(category)
    image_list = os.listdir(path_pd)
    
    for img in image_list:
        images.append([join(path_pd,img),class_num,img])

print(images)
print(images[0][0])


[['./data/predict_dir/0/3641.png', 0, '3641.png'], ['./data/predict_dir/1/5852.png', 1, '5852.png'], ['./data/predict_dir/2/5968.png', 2, '5968.png'], ['./data/predict_dir/3/2629.png', 3, '2629.png'], ['./data/predict_dir/4/9928.png', 4, '9928.png'], ['./data/predict_dir/5/9314.png', 5, '9314.png'], ['./data/predict_dir/6/898.png', 6, '898.png'], ['./data/predict_dir/7/2347.png', 7, '2347.png']]
./data/predict_dir/0/3641.png


In [8]:
COLS = 5
rows = len(images)//COLS+1

input_img = []

for idx, image in enumerate(images):
    col = idx%COLS
    row = idx//COLS
    
    img = load_img(path=image[0], color_mode='rgb', target_size=DATA_SHAPE)
    # img = img_to_array(img)
    img2predict = img.copy()
    img2predict = img_to_array(img2predict)
    # img2predict = np.expand_dims(img2predict,0)
    img2predict /= 255
    # int(img2predict)
    # print(img2predict.shape)
    input_img.append(img2predict)
    
input_img = np.array(input_img)
print(input_img.shape)

(8, 100, 100, 3)


#### d.2) Perform inference using TensorRT model

In [None]:
# variable
# print(tr_filename)
# TENSORRT_MODEL_PATH = './saved_model/'+tr_filename

TENSORRT_MODEL_PATH = './saved_model/TensorRT_Magma_model_01-07-2019_16-54-31.pb'

# INPUT_TENSOR = INPUTS + ':0'
# OUTPUT_TENSOR = OUTPUTS + ':0'

graph = tf.Graph()
with graph.as_default():
    with tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=0.50))) as sess:
        # read TensorRT model
        print('read TensorRT model')
        trt_graph = read_pb_graph(TENSORRT_MODEL_PATH)

        # obtain the corresponding input-output tensor
        print('obtain the corresponding input-output tensor')
        tf.import_graph_def(trt_graph, name='')
        input = sess.graph.get_tensor_by_name('input_tensor_input:0')
        output = sess.graph.get_tensor_by_name('output_tensor/Softmax:0')

        # in this case, it demonstrates to perform inference for 50 times
        print('in this case, it demonstrates to perform inference for 50 times')
        total_time = 0; n_time_inference = 3
        print('before Sess run')
        out_pred = sess.run(output, feed_dict={input: input_img}) # <--------- boom here
        print('after Sess run')
        for i in range(n_time_inference):
            print('in loop',i)
            t1 = time.time()
            out_pred = sess.run(output, feed_dict={input: input_img})
            t2 = time.time()
            delta_time = t2 - t1
            total_time += delta_time
            print("needed time in inference-" + str(i) + ": ", delta_time)
        avg_time_tensorRT = total_time / n_time_inference
        print("average inference time: ", avg_time_tensorRT)

read TensorRT model
obtain the corresponding input-output tensor
in this case, it demonstrates to perform inference for 50 times
before Sess run


#### d.3) Perform inference using the original tensorflow model

In [None]:
# variable
FROZEN_MODEL_PATH = './saved_model/Magma_frozen_model.pb'

graph = tf.Graph()
with graph.as_default():
    with tf.Session() as sess:
        # read TensorRT model
        frozen_graph = read_pb_graph(FROZEN_MODEL_PATH)

        # obtain the corresponding input-output tensor
        tf.import_graph_def(frozen_graph, name='')
        input = sess.graph.get_tensor_by_name('conv2d_5_input:0')
        output = sess.graph.get_tensor_by_name('output_tensor_1/Softmax:0')

        # in this case, it demonstrates to perform inference for 50 times
        total_time = 0; n_time_inference = 50
        out_pred = sess.run(output, feed_dict={input: input_img})
        for i in range(n_time_inference):
            t1 = time.time()
            out_pred = sess.run(output, feed_dict={input: input_img})
            t2 = time.time()
            delta_time = t2 - t1
            total_time += delta_time
            print("needed time in inference-" + str(i) + ": ", delta_time)
        avg_time_original_model = total_time / n_time_inference
        print("average inference time: ", avg_time_original_model)
        print("TensorRT improvement compared to the original model:", avg_time_original_model/avg_time_tensorRT)

#### d.4) Plot the prediction result

In [None]:
for pred in out_pred:
#     plt.figure('img 1')
    plt.imshow(img1, cmap='rgb')
    plt.title('pred:' + str(np.argmax(pred)), fontsize=22)

# Visualize the original and optimized graphs
Using [netron](https://lutzroeder.github.io/netron/), the web application for vitsulaize model graph by upload .pb file.

## 2. Calulate Time, mAP, ACC of Keras model

## 3. Process in TensorRT

## 4. Calculate Time, mAP, ACC of converted model