# [How to run Keras model inference x3 times faster with CPU and Intel OpenVINO](https://www.dlology.com/blog/how-to-run-keras-model-inference-x3-times-faster-with-cpu-and-intel-openvino-1/) | DLology Blog
Run the `setupvars.bat` before calling `jupyter notebook`.
```
C:\Intel\computer_vision_sdk\bin\setupvars.bat
```
Or in Linux
add the following line to `~/.bashrc`
```
source ~/intel/computer_vision_sdk/bin/setupvars.sh
```

For some steps, ipython kernel will **restart** before preceding to mimic a fresh start.


## Save the Keras model as a single `.h5` file.

In [1]:
# Force use CPU only.
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

import tensorflow as tf
from tensorflow.keras.applications.inception_v3 import InceptionV3 as Net
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.inception_v3 import preprocess_input, decode_predictions
import numpy as np

# Optional image to test model prediction.
img_path = './data/elephant.jpg'
model_path = './model'

# Path to save the model h5 file.
model_fname = os.path.join(model_path, 'model.h5')

os.makedirs(model_path, exist_ok=True)

img_height = 224

model = Net(weights='imagenet')


# Load the image for prediction.
img = image.load_img(img_path, target_size=(img_height, img_height))
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)
x = preprocess_input(x)

preds = model.predict(x)
# decode the results into a list of tuples (class, description, probability)
# (one such list for each sample in the batch)
print('Predicted:', decode_predictions(preds, top=3)[0])
# Predicted: [(u'n02504013', u'Indian_elephant', 0.82658225), (u'n01871265', u'tusker', 0.1122357), (u'n02504458', u'African_elephant', 0.061040461)]

# Save the h5 file to path specified.
model.save(model_fname)

Predicted: [('n02504458', 'African_elephant', 0.97529536), ('n02504013', 'Indian_elephant', 0.019222798), ('n01871265', 'tusker', 0.0054576383)]


### Benchmark Keras prediction speed.

In [2]:
import time
times = []
for i in range(20):
    start_time = time.time()
    preds = model.predict(x)
    delta = (time.time() - start_time)
    times.append(delta)
mean_delta = np.array(times).mean()
fps = 1/mean_delta
print('average(sec):{},fps:{}'.format(mean_delta,fps))

# Clear any previous session.
tf.keras.backend.clear_session()

average(sec):0.07710394859313965,fps:12.969504393047067


## Freeze graph

In [3]:
# force reset ipython namespaces
%reset -f

import tensorflow as tf
from tensorflow.python.framework import graph_io
from tensorflow.keras.models import load_model


# Clear any previous session.
tf.keras.backend.clear_session()

save_pb_dir = './model'
model_fname = './model/model.h5'
def freeze_graph(graph, session, output, save_pb_dir='.', save_pb_name='frozen_model.pb', save_pb_as_text=False):
    with graph.as_default():
        graphdef_inf = tf.graph_util.remove_training_nodes(graph.as_graph_def())
        graphdef_frozen = tf.graph_util.convert_variables_to_constants(session, graphdef_inf, output)
        graph_io.write_graph(graphdef_frozen, save_pb_dir, save_pb_name, as_text=save_pb_as_text)
        return graphdef_frozen

# This line must be executed before loading Keras model.
tf.keras.backend.set_learning_phase(0) 

model = load_model(model_fname)

session = tf.keras.backend.get_session()

INPUT_NODE = [t.op.name for t in model.inputs]
OUTPUT_NODE = [t.op.name for t in model.outputs]
print(INPUT_NODE, OUTPUT_NODE)
frozen_graph = freeze_graph(session.graph, session, [out.op.name for out in model.outputs], save_pb_dir=save_pb_dir)

['input_1'] ['predictions/Softmax']
INFO:tensorflow:Froze 378 variables.
INFO:tensorflow:Converted 378 variables to const ops.


In [4]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, None, None, 3 0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, None, None, 3 864         input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, None, None, 3 96          conv2d[0][0]                     
__________________________________________________________________________________________________
activation (Activation)         (None, None, None, 3 0           batch_normalization[0][0]        
__________________________________________________________________________________________________
conv2d_1 (

## Model optimization

In [1]:
# force reset ipython namespaces
%reset -f

import platform
is_win = 'windows' in platform.platform().lower()

"""
# OpenVINO 2018
if is_win:
    mo_tf_path = 'C:/Intel/computer_vision_sdk/deployment_tools/model_optimizer/mo_tf.py'
else:
    # mo_tf.py path in Linux
    mo_tf_path = '~/intel/computer_vision_sdk/deployment_tools/model_optimizer/mo_tf.py'
"""

# OpenVINO 2019
if is_win:
    mo_tf_path = '"C:\Program Files (x86)\IntelSWTools\openvino\deployment_tools\model_optimizer\mo_tf.py"'
else:
    # mo_tf.py path in Linux
    mo_tf_path = '/opt/intel/openvino/deployment_tools/model_optimizer/mo_tf.py'

pb_file = './model/frozen_model.pb'
output_dir = './model'
img_height = 224
input_shape = [1, img_height, img_height, 3]
input_shape_str = str(input_shape).replace(' ', '')
input_shape_str

'[1,224,224,3]'

Use python3.5+

In [2]:
!python {mo_tf_path} --input_model {pb_file} --output_dir {output_dir} --input_shape {input_shape_str} --data_type FP16

Model Optimizer arguments:
Common parameters:
	- Path to the Input Model: 	E:\SW_WS\Python_SW\keras_openvino\./model/frozen_model.pb
	- Path for generated IR: 	E:\SW_WS\Python_SW\keras_openvino\./model
	- IR output name: 	frozen_model
	- Log level: 	ERROR
	- Batch: 	Not specified, inherited from the model
	- Input layers: 	Not specified, inherited from the model
	- Output layers: 	Not specified, inherited from the model
	- Input shapes: 	[1,224,224,3]
	- Mean values: 	Not specified
	- Scale values: 	Not specified
	- Scale factor: 	Not specified
	- Precision of IR: 	FP16
	- Enable fusing: 	True
	- Enable grouped convolutions fusing: 	True
	- Move mean values to preprocess section: 	False
	- Reverse input channels: 	False
TensorFlow specific parameters:
	- Input model in text protobuf format: 	False
	- Path to model dump for TensorBoard: 	None
	- List of shared libraries with TensorFlow custom layers implementation: 	None
	- Update the configuration file with input/output node names: 	No

## Inference test with OpenVINO Inference Engine(IE)

Check path like `C:\Intel\computer_vision_sdk\python\python3.5` or `~/intel/computer_vision_sdk/python/python3.5` exists in `PYTHONPATH`.

In [3]:
# force reset ipython namespaces
%reset -f

import platform
import os

is_win = 'windows' in platform.platform().lower()
""" 
# OpenVINO 2018.
if is_win:
    message = "Please run `C:\\Intel\\computer_vision_sdk\\bin\\setupvars.bat` before running this."
else:
    message = "Add the following line to ~/.bashrc and re-run.\nsource ~/intel/computer_vision_sdk/bin/setupvars.sh"
"""

# OpenVINO 2019.
if is_win:
    message = 'Please run "C:\Program Files (x86)\IntelSWTools\openvino\bin\setupvars.bat" before running this.'
else:
    message = "Add the following line to ~/.bashrc and re-run.\nsource /opt/intel/openvino/bin/setupvars.sh"

assert 'computer_vision_sdk' in os.environ['PYTHONPATH'] or 'openvino' in os.environ['PYTHONPATH'], message


In [4]:
from PIL import Image
import numpy as np
try:
    from openvino import inference_engine as ie
    from openvino.inference_engine import IENetwork, IEPlugin
except Exception as e:
    exception_type = type(e).__name__
    print("The following error happened while importing Python API module:\n[ {} ] {}".format(exception_type, e))
    sys.exit(1)

In [5]:
def pre_process_image(imagePath, img_height=224):
    # Model input format
    n, c, h, w = [1, 3, img_height, img_height]
    image = Image.open(imagePath)
    processedImg = image.resize((h, w), resample=Image.BILINEAR)

    # Normalize to keep data between 0 - 1
    processedImg = (np.array(processedImg) - 0) / 255.0

    # Change data layout from HWC to CHW
    processedImg = processedImg.transpose((2, 0, 1))
    processedImg = processedImg.reshape((n, c, h, w))

    return image, processedImg, imagePath

In [7]:
# Plugin initialization for specified device and load extensions library if specified.
plugin_dir = None
model_xml = './model/frozen_model.xml'
model_bin = './model/frozen_model.bin'
# Devices: GPU (intel), CPU, MYRIAD
plugin = IEPlugin("GPU", plugin_dirs=plugin_dir)
# Read IR
net = IENetwork(model=model_xml, weights=model_bin)
assert len(net.inputs.keys()) == 1
assert len(net.outputs) == 1
input_blob = next(iter(net.inputs))
out_blob = next(iter(net.outputs))
# Load network to the plugin
exec_net = plugin.load(network=net)
del net

In [8]:
# Run inference
fileName = './data/elephant.jpg'
image, processedImg, imagePath = pre_process_image(fileName)
res = exec_net.infer(inputs={input_blob: processedImg})
# Access the results and get the index of the highest confidence score
output_node_name = list(res.keys())[0]
res = res[output_node_name]
idx = np.argsort(res[0])[-1]
idx

386

In [9]:
from tensorflow.keras.applications.inception_v3 import decode_predictions
print('Predicted:', decode_predictions(res, top=3)[0])

Predicted: [('n02504458', 'African_elephant', 0.92089844), ('n01871265', 'tusker', 0.058898926), ('n02504013', 'Indian_elephant', 0.019760132)]


In [10]:
import time
times = []
for i in range(20):
    start_time = time.time()
    res = exec_net.infer(inputs={input_blob: processedImg})
    delta = (time.time() - start_time)
    times.append(delta)
mean_delta = np.array(times).mean()
fps = 1/mean_delta
print('average(sec):{},fps:{}'.format(mean_delta,fps))

average(sec):0.026259243488311768,fps:38.0818282310802


## Benchmark against TensorFlow, inference speed.

In [8]:
# force reset ipython namespaces
%reset -f

import tensorflow as tf
import os
import sys
from tensorflow.python.platform import gfile
from PIL import Image
import numpy as np

In [2]:
pb_file = './model/frozen_model.pb'

In [3]:
num_inputs = 1
input_names = [
    'import/input_{}:0'.format(i+1) for i in range(num_inputs)]
# config = tf.ConfigProto()
# Use CPU only
config = tf.ConfigProto(
    device_count={'GPU': 0}
)

# config.gpu_options.per_process_gpu_memory_fraction = 0.125
sess = tf.Session(config=config)

f = gfile.FastGFile(pb_file, 'rb')
graph_def = tf.GraphDef()
# Parses a serialized binary message into the current message.
graph_def.ParseFromString(f.read())
f.close()

sess.graph.as_default()
# Import a serialized TensorFlow `GraphDef` protocol buffer
# and place into the current default `Graph`.
tf.import_graph_def(graph_def)

output_tensor = sess.graph.get_tensor_by_name(
    'import/predictions/Softmax:0')

Instructions for updating:
Use tf.gfile.GFile.


In [4]:
images = []
img_height = 224
fname = './data/elephant.jpg'
im = Image.open(fname).resize((img_height, img_height), Image.BICUBIC)
im = np.array(im) / 255.0
im = im[None, ...]
images.append(im)

In [5]:
feed_dict = dict(zip(input_names, images))
one_prediction = sess.run(output_tensor, feed_dict)
one_prediction[0].argmax()

386

In [6]:
from tensorflow.keras.applications.inception_v3 import decode_predictions
print('Predicted:', decode_predictions(one_prediction, top=3)[0])

Predicted: [('n02504458', 'African_elephant', 0.9709825), ('n01871265', 'tusker', 0.020130202), ('n02504013', 'Indian_elephant', 0.008887009)]


In [7]:
import time
times = []
for i in range(20):
    start_time = time.time()
    one_prediction = sess.run(output_tensor, feed_dict)
    delta = (time.time() - start_time)
    times.append(delta)
mean_delta = np.array(times).mean()
fps = 1/mean_delta
print('average(sec):{},fps:{}'.format(mean_delta,fps))


average(sec):0.06710000038146972,fps:14.903129572502344
