https://colab.research.google.com/gist/ohtaman/c1cf119c463fd94b0da50feea320ba1e/edgetpu-with-keras.ipynb#scrollTo=zI0zfQTL-p5U

In [1]:
import tensorflow as tf
from tensorflow import keras

import numpy as np
print(tf.__version__)

1.14.1-dev20190622


In [2]:
def quantize(detail, data):
    shape = detail['shape']
    dtype = detail['dtype']
    a, b = detail['quantization']
    
    return (data/a + b).astype(dtype).reshape(shape)


def dequantize(detail, data):
    a, b = detail['quantization']
    
    return (data - b)*a

In [64]:
fashion_mnist = keras.datasets.fashion_mnist
(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()

train_images = np.expand_dims(train_images / 255.0, -1)
test_images = np.expand_dims(test_images / 255.0, -1)

# train_images = np.expand_dims(train_images, -1)
# test_images = np.expand_dims(test_images, -1)

In [65]:
tf.enable_eager_execution()

In [66]:
def build_keras_model():
    return keras.Sequential([
        keras.layers.Conv2D(16, 3, activation='relu', padding='same', dilation_rate=(2, 1), input_shape=(28, 28, 1)),
        keras.layers.BatchNormalization(fused=False),
        keras.layers.Conv2D(16, 3, activation='relu', dilation_rate=1),
        keras.layers.BatchNormalization(fused=False),
        keras.layers.GlobalAveragePooling2D(data_format='channels_last'),
        keras.layers.Dense(128, activation='relu'),
        keras.layers.BatchNormalization(),
        keras.layers.Dense(10, activation='softmax')
    ])

In [67]:
# train
train_graph = tf.Graph()
train_sess = tf.Session(graph=train_graph)

keras.backend.set_session(train_sess)
with train_graph.as_default():
    train_model = build_keras_model()
    tf.contrib.quantize.create_training_graph(input_graph=train_graph, quant_delay=100)
    train_sess.run(tf.global_variables_initializer())    

    train_model.compile(
        optimizer='adam',
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    train_model.fit(train_images, train_labels, epochs=1, validation_data=(test_images, test_labels))
    
    # save graph and checkpoints
    saver = tf.train.Saver()
    saver.save(train_sess, 'checkpoints')

Train on 60000 samples, validate on 10000 samples


In [68]:
keras.backend.set_session(train_sess)
with train_graph.as_default():
    print('sample result of original model')
    a = train_model.predict(test_images[7:8])
print(a)
print(a.argmax(), a[0][a.argmax()])
print(test_labels[4:5])

sample result of original model
[[5.3106723e-03 8.2482910e-04 2.7581239e-02 2.0710243e-02 8.6761676e-02
  1.4784020e-04 8.5853201e-01 4.0725285e-05 4.6997880e-05 4.3749307e-05]]
6 0.858532
[6]


In [69]:
# eval
eval_graph = tf.Graph()
eval_sess = tf.Session(graph=eval_graph)

keras.backend.set_session(eval_sess)
with eval_graph.as_default():
    keras.backend.set_learning_phase(0)
    eval_model = build_keras_model()
    tf.contrib.quantize.create_eval_graph(input_graph=eval_graph)
    eval_graph_def = eval_graph.as_graph_def()
    saver = tf.train.Saver()
    saver.restore(eval_sess, 'checkpoints')

    frozen_graph_def = tf.graph_util.convert_variables_to_constants(
        eval_sess,
        eval_graph_def,
        [eval_model.output.op.name]
    )

    with open('frozen_model.pb', 'wb') as f:
        f.write(frozen_graph_def.SerializeToString())

In [85]:
keras.backend.set_session(eval_sess)
with eval_graph.as_default():
    a = eval_model.predict(test_images[15:16])

print(a)
print(a.argmax(), a[0][a.argmax()])
print(test_labels[4:5])

[[1.2353873e-02 7.3420727e-01 1.4526880e-04 2.5059506e-01 3.1954062e-04
  7.7658217e-07 2.3766896e-03 2.2966522e-07 1.0343795e-06 2.6505822e-07]]
1 0.7342073
[6]


In [71]:
quantized = len([n.name for n in eval_graph_def.node if 'quant' in n.name]) > 0
print('Quantization succeeded:', quantized)

Quantization succeeded: True


### TFLite w/o qunatization

In [76]:
%%bash

tflite_convert \
    --output_file=model.tflite \
    --graph_def_file=frozen_model.pb \
    --inference_type=FLOAT \
    --inference_input_type=FLOAT \
    --input_arrays=conv2d_input \
    --output_arrays=dense_1/Softmax

2019-07-02 16:32:57.950211: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcuda.so.1
2019-07-02 16:32:57.958756: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1618] Found device 0 with properties: 
name: GeForce GTX TITAN X major: 5 minor: 2 memoryClockRate(GHz): 1.076
pciBusID: 0000:02:00.0
2019-07-02 16:32:57.959887: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1618] Found device 1 with properties: 
name: GeForce GTX TITAN X major: 5 minor: 2 memoryClockRate(GHz): 1.076
pciBusID: 0000:03:00.0
2019-07-02 16:32:57.959996: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.0
2019-07-02 16:32:57.960707: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10.0
2019-07-02 16:32:57.961391: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcufft.so.1

In [77]:
# load TFLite file
interpreter = tf.lite.Interpreter(model_path=f'model.tflite')
# Allocate memory. 
interpreter.allocate_tensors()

# get some informations .
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

print(input_details)
print(output_details)

[{'name': 'conv2d_input', 'index': 26, 'shape': array([ 1, 28, 28,  1], dtype=int32), 'dtype': <class 'numpy.float32'>, 'quantization': (0.0, 0)}]
[{'name': 'dense_1/Softmax', 'index': 33, 'shape': array([ 1, 10], dtype=int32), 'dtype': <class 'numpy.float32'>, 'quantization': (0.0, 0)}]


In [78]:
input_ = test_images[7:8].astype(np.float32)
interpreter.set_tensor(input_details[0]['index'], input_)
interpreter.invoke()

# The results are stored on 'index' of output_details
output_ = interpreter.get_tensor(output_details[0]['index'])

print(output_)
print(output_[0].argmax(), output_[0][output_.argmax()])
print(test_labels[4:5])

[[2.6056336e-04 8.2783525e-05 1.5631125e-03 6.1572966e-04 8.7285908e-03
  1.9746150e-05 9.8870695e-01 9.7344878e-07 1.4824840e-05 6.7396331e-06]]
6 0.98870695
[6]


### TFLite w/ qunatization

In [80]:
%%bash

tflite_convert \
    --output_file=model_quant.tflite \
    --graph_def_file=frozen_model.pb \
    --inference_type=QUANTIZED_UINT8 \
    --input_arrays=conv2d_input \
    --output_arrays=dense_1/Softmax \
    --mean_values=0 \
    --std_dev_values=255

2019-07-02 16:33:21.559546: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcuda.so.1
2019-07-02 16:33:21.567928: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1618] Found device 0 with properties: 
name: GeForce GTX TITAN X major: 5 minor: 2 memoryClockRate(GHz): 1.076
pciBusID: 0000:02:00.0
2019-07-02 16:33:21.569059: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1618] Found device 1 with properties: 
name: GeForce GTX TITAN X major: 5 minor: 2 memoryClockRate(GHz): 1.076
pciBusID: 0000:03:00.0
2019-07-02 16:33:21.569186: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.0
2019-07-02 16:33:21.569954: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10.0
2019-07-02 16:33:21.570648: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcufft.so.1

In [81]:
# load TFLite file
interpreter = tf.lite.Interpreter(model_path=f'model_quant.tflite')
# Allocate memory. 
interpreter.allocate_tensors()

# get some informations .
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

print(input_details)
print(output_details)

[{'name': 'conv2d_input', 'index': 18, 'shape': array([ 1, 28, 28,  1], dtype=int32), 'dtype': <class 'numpy.uint8'>, 'quantization': (0.003921568859368563, 0)}]
[{'name': 'dense_1/Softmax', 'index': 24, 'shape': array([ 1, 10], dtype=int32), 'dtype': <class 'numpy.uint8'>, 'quantization': (0.00390625, 0)}]


In [86]:
quantized_input = quantize(input_details[0], test_images[15:16])
interpreter.set_tensor(input_details[0]['index'], quantized_input)
interpreter.invoke()

# The results are stored on 'index' of output_details
quantized_output = interpreter.get_tensor(output_details[0]['index'])

print('sample result of quantized model')
output_ = dequantize(output_details[0], quantized_output)
print(output_)
print(output_.argmax(), output_[0][output_.argmax()])
print(test_labels[7:8])

sample result of quantized model
[[0.         0.984375   0.         0.01171875 0.         0.
  0.00390625 0.         0.         0.        ]]
1 0.984375
[6]
