In [19]:
#GETTING THE CIFAR DATASET READY FOR EXPERIMENTATION
import tensorflow_datasets as tfds
import tensorflow as     tf
import math
import numpy             as np
import matplotlib.pyplot as plt
from tensorflow.keras.layers import *
import pathlib

print(tf.__version__)

2.1.0


In [2]:
from utils.cifar_functions import *

#PREPARE THE CIFAR DATASET
# download data and split into training and testing datasets
dataset_train, info = tfds.load("cifar10", split=tfds.Split.TRAIN, with_info=True)
dataset_test,  info = tfds.load("cifar10", split=tfds.Split.TEST,  with_info=True)

dataset_train = dataset_train.map(pre_processing_train, num_parallel_calls=4)
dataset_train = dataset_train.shuffle(buffer_size=TRAINING_SHUFFLE_BUFFER)
dataset_train = dataset_train.batch(TRAINING_BATCH_SIZE)
dataset_train = dataset_train.prefetch(buffer_size=3)

# transform testing dataset
dataset_test = dataset_test.map(pre_processing_test, num_parallel_calls=4)
dataset_test = dataset_test.batch(TRAINING_BATCH_SIZE)
dataset_test = dataset_test.prefetch(buffer_size=3)

# TensorFlow Lite


## Overview ([link](https://www.tensorflow.org/lite/guide/get_started#4_optimize_your_model_optional))

To use a TensorFlow model you must convert it into TFLite format. You cannot create a model with TFLite, you must convert a tensorflow model to TFLite.

TFLite is designed to execute models efficiently for low resoure settings. Converting models reduces their file size, further optimizations increase speed decrease size with some tradeoffs.



## TensorFlow Lite Converter

The converter can convert from many sources: Keras models and SavedModel directories. Converts the model int a `.tflite` file

In [27]:
#SavedModel from mobilenet_v2 implementation
model_dir = "F://Models/Model_Design/mobilenet_v2/"
model = tf.keras.models.load_model(model_dir)

converter = tf.lite.TFLiteConverter.from_saved_model(model_dir)
tflite_model = converter.convert()

#write to .tflite file
base_dir = pathlib.Path(model_dir).parent.parent / "TFLite/"
base_dir.mkdir(parents=True, exist_ok=True)
tflite_path = base_dir/'mobilenet_v2.tflite'
tflite_path.write_bytes(tflite_model)

4129644

The `Interpreter` object can be called with keywords `model_content=tflit_obj` or `model_path`. Next after the interpreter has been instantiated, `allocate_tensors()` will retrieve the tensors required in the model graph. Using `get_input/output_details()` will give the shape and index of the input/output tensors. Use this later when running inference

In [14]:
# Load TFLite model and allocate tensors.
interpreter = tf.lite.Interpreter(model_content=tflite_model)
interpreter.allocate_tensors()

# Get input and output tensors.
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

# Test the TensorFlow Lite model on random input data.
input_shape = input_details[0]['shape']
print(input_details)

[{'name': 'input_2', 'index': 168, 'shape': array([ 1, 28, 28,  3]), 'dtype': <class 'numpy.float32'>, 'quantization': (0.0, 0)}]


## Running Inference

The __TensorFlow Lite Interpreter__ takes a model file and eecutes its operation. The interpreter has APIs in many different languages.

In [34]:
input_data = np.array(np.random.random_sample(input_shape), dtype=np.float32)
interpreter.set_tensor(input_details[0]['index'], input_data)

interpreter.invoke()

# The function `get_tensor()` returns a copy of the tensor data.
# Use `tensor()` in order to get a pointer to the tensor.
tflite_results = interpreter.get_tensor(output_details[0]['index'])

# Test the TensorFlow model on random input data.
tf_results = model(input_data)

# Compare the result.
for tf_result, tflite_result in zip(tf_results, tflite_results):
    np.testing.assert_almost_equal(tf_result, tflite_result, decimal=5)

print(tflite_results)

[[9.2361607e-10 7.8270359e-06 4.3587871e-03 9.3027151e-01 1.0160868e-04
  5.8098314e-05 6.5200336e-02 3.3402384e-10 1.2902692e-07 1.7488577e-06]]


### GPU Acceleration

TFLite Interpreter can be specifes to make use of hardware acceleration o

## Model Optimization


### Quantization

https://www.tensorflow.org/model_optimization/guide/quantization/post_training

https://www.tensorflow.org/lite/performance/post_training_quantization

https://www.tensorflow.org/lite/performance/post_training_quant
https://www.tensorflow.org/lite/performance/post_training_integer_quant
https://www.tensorflow.org/lite/performance/post_training_float16_quant
https://github.com/tensorflow/tensorflow/tree/r1.14/tensorflow/contrib/quantize
[github tutorial](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/g3doc/performance/post_training_quant.ipynb)

TFLite allows quantization during model conversion. Activations are always stored in floating point. Some ops allow quantized kernels, prior to op, activations are quantized to 8-bits of precision dynamically, then dequantized to float after processing.

With post training quantization, you must test to see if performance degradation is acceptable.

In [18]:
#unquantized model performance
test_loss, test_accuracy = model.evaluate(x=dataset_test)

    157/Unknown - 5s 32ms/step - loss: 0.2933 - accuracy: 0.9215 5s 31ms/step - loss: 0.2971 - accu

In [32]:
#SavedModel size
out = sum(f.stat().st_size for f in pathlib.Path(model_dir).glob('**/*') if f.is_file() )
print(out/1e6, "MB")

16.893877 MB


In [36]:
#tflite size wout quantizaton (in bytes)
out = tflite_path.stat().st_size
print(out/1e6, "MB")

4.129644 MB


In [38]:
#Set optimizer flag to optimize for size
converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_SIZE]
tflite_quant_model = converter.convert()
tflite_model_quant_file = base_dir/"mobilenet_v2_quant.tflite"
out = tflite_model_quant_file.write_bytes(tflite_quant_model)
print(out/1e6, "MB")

1.091672 MB


In [None]:
interpreter = tf.lite.Interpreter(model_path=str(tflite_path))
interpreter.allocate_tensors()

interpreter_quant = tf.lite.Interpreter(model_path=str(tflite_model_quant_file))
interpreter.allocate_tensors()



### Pruning
https://www.tensorflow.org/model_optimization/guide/pruning/train_sparse_models

https://www.tensorflow.org/lite/performance/post_training_quantization

https://www.tensorflow.org/model_optimization/guide/pruning/train_sparse_models

https://www.tensorflow.org/model_optimization/guide/pruning/pruning_with_keras

https://www.tensorflow.org/lite/guide/hosted_models