In [1]:
#GETTING THE CIFAR DATASET READY FOR EXPERIMENTATION
import tensorflow_datasets as tfds
import tensorflow as     tf
import math
import numpy             as np
import matplotlib.pyplot as plt
from tensorflow.keras.layers import *
import pathlib

print(tf.__version__)

2.1.0


In [2]:
from utils.cifar_functions import *

#PREPARE THE CIFAR DATASET
# download data and split into training and testing datasets
dataset_train, info = tfds.load("cifar10", split=tfds.Split.TRAIN, with_info=True)
dataset_test,  info = tfds.load("cifar10", split=tfds.Split.TEST,  with_info=True)

dataset_train = dataset_train.map(pre_processing_train, num_parallel_calls=4)
dataset_train = dataset_train.shuffle(buffer_size=TRAINING_SHUFFLE_BUFFER)
dataset_train = dataset_train.batch(TRAINING_BATCH_SIZE)
dataset_train = dataset_train.prefetch(buffer_size=3)

# transform testing dataset
dataset_test = dataset_test.map(pre_processing_test, num_parallel_calls=4)
dataset_test = dataset_test.batch(TRAINING_BATCH_SIZE)
dataset_test = dataset_test.prefetch(buffer_size=3)

# TensorFlow Lite


## Overview ([link](https://www.tensorflow.org/lite/guide/get_started#4_optimize_your_model_optional))

To use a TensorFlow model you must __convert it into TFLite format__. You cannot create a model with TFLite, you must convert an existing tensorflow model to TFLite.

TFLite is designed to execute models efficiently for low resource settings. Converting models reduces their file size, further optimizations increase speed decrease size with some tradeoffs.



## TensorFlow Lite Converter

The converter can convert from: Keras models and SavedModel directories. Converts the model into a `.tflite` file.

We will be working with an existing TensorFlow model. Specifically we will be using an implementation of MobileNet V2 from [this notebook](https://github.com/harrisonjansma/2020_Notes/blob/master/DL/Implementations/CV/Image_Recognition/0_ResNet_and_MobileNet_V2.ipynb).

In [3]:
#SavedModel from mobilenet_v2 implementation
model_dir = "F://Models/Model_Design/mobilenet_v2/"
model = tf.keras.models.load_model(model_dir)

In [4]:
#convert to tflite model
converter = tf.lite.TFLiteConverter.from_saved_model(model_dir)
tflite_model = converter.convert()

#Create a TFLite subdirectory in F://Models
base_dir = pathlib.Path(model_dir).parent.parent / "TFLite/"
base_dir.mkdir(parents=True, exist_ok=True)

In [5]:
#save the converted model to .tflite file
tflite_path = base_dir/'mobilenet_v2.tflite'
tflite_path.write_bytes(tflite_model)

4129644

The `Interpreter` object can be called with keywords `model_content=tflite_obj` or `model_path=path/to/file.tflite`. 

After the interpreter has been instantiated, `allocate_tensors()` will retrieve the tensors required in the model graph. Using `get_input/output_details()` will give the shape and index of the input/output tensors. Use this later when running inference

In [6]:
# Load TFLite model and allocate tensors.
interpreter = tf.lite.Interpreter(model_path=str(tflite_path))
interpreter.allocate_tensors()

# Get input and output tensors.
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

#shows the retrieved input data
input_shape = input_details[0]['shape']
print(input_details)

[{'name': 'input_2', 'index': 168, 'shape': array([ 1, 28, 28,  3]), 'dtype': <class 'numpy.float32'>, 'quantization': (0.0, 0)}]


## TFLite Inference ([tutorial](https://www.tensorflow.org/lite/guide/inference))

The __TensorFlow Lite Interpreter__ takes a model file and executes its operation. The interpreter has APIs in many different languages and is designed to be lean and dast..

In [7]:
#create a random numpy array  of same shape as input
input_data = np.array(np.random.random_sample(input_shape), dtype=np.float32)
#sets the network-input tensor w/ above data
interpreter.set_tensor(input_details[0]['index'], input_data)

#release computation
interpreter.invoke()

# The function `get_tensor()` returns a copy of the tensor data.
# Use `tensor()` in order to get a pointer to the tensor.
tflite_results = interpreter.get_tensor(output_details[0]['index'])

# Test the original TensorFlow model on random input data.
tf_results = model(input_data)

# Compare the results.
for tf_result, tflite_result in zip(tf_results, tflite_results):
    np.testing.assert_almost_equal(tf_result, tflite_result, decimal=5)

print(tflite_results[0,:3])
print(tf_results[0,:3])

[1.3530305e-09 8.1705935e-03 2.9588188e-03]
tf.Tensor([1.3530357e-09 8.1705702e-03 2.9588216e-03], shape=(3,), dtype=float32)


### GPU Acceleration

TFLite Interpreter can be specifes to make use of hardware acceleration.

## Model Optimization


### Quantization

[github tutorial](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/g3doc/performance/post_training_quant.ipynb)

TFLite allows quantization during model conversion. Activations are always stored in floating point. Some ops allow quantized kernels, prior to op, activations are quantized to 8-bits of precision dynamically, then dequantized to float after processing.

With post training quantization, you must test to see if performance degradation is acceptable.

__Quantizing an existing model__

In [8]:
#Set optimizer flag to optimize for size
converter = tf.lite.TFLiteConverter.from_saved_model(model_dir)
converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_SIZE]
tflite_quant_model = converter.convert()

In [9]:
tflite_model_quant_file = base_dir/"mobilenet_v2_quant.tflite"
tflite_model_quant_file.write_bytes(tflite_quant_model)

1091672

__Comparing model sizes__ (on disk)

In [10]:
#SavedModel size
out = sum(f.stat().st_size for f in pathlib.Path(model_dir).glob('**/*') if f.is_file() )
print(out/1e6, "MB")

16.893877 MB


In [11]:
#tflite size w/out quantizaton 
out = tflite_path.stat().st_size
print(out/1e6, "MB")

4.129644 MB


In [12]:
#tflite size w quantization
out = tflite_model_quant_file.stat().st_size
print(out/1e6, "MB")

1.091672 MB


__Running Inference__

In [13]:
#instantiate the Interpreters for both quantized and unquantized models
interpreter = tf.lite.Interpreter(model_path=str(tflite_path))
interpreter.allocate_tensors()

interpreter_quant = tf.lite.Interpreter(model_path=str(tflite_model_quant_file))
interpreter_quant.allocate_tensors()

Make sure that the data that is input is the same shape and datatype as the input tensor of the model. We can check this with `interpreter.get_input_details()`.

In [14]:
interpreter.get_input_details()

[{'name': 'input_2',
  'index': 168,
  'shape': array([ 1, 28, 28,  3]),
  'dtype': numpy.float32,
  'quantization': (0.0, 0)}]

Currently the dataset we used for testing `dataset_test` is full of 28x28 cifar10 images and there corresponding labels. Note that the batch size is greater than 1, so we will have to `.unbatch()` after we benchmark the original tensorflow model.

In [15]:
#loooking at the dims of input data
for im,lab in dataset_test:
    print(im.shape)
    print(im.dtype)
    break

(64, 28, 28, 3)
<dtype: 'float32'>


In [16]:
def evaluate_model(interpreter):
    input_index = interpreter.get_input_details()[0]['index']
    output_index = interpreter.get_output_details()[0]['index']
    
    num_correct=0
    num_predictions=0
    for image, label in dataset_test:
        image = np.expand_dims(image.numpy(), axis=0)
        interpreter.set_tensor(input_index, image)
        
        interpreter.invoke()

        output = interpreter.tensor(output_index)
        digit = np.argmax(output()[0])

        if digit==label:
            num_correct+=1
        num_predictions+=1
        
    accuracy = num_correct * 1.0 / num_predictions
    return accuracy

In [17]:
#unquantized model performance
test_loss, test_accuracy = model.evaluate(x=dataset_test)

    157/Unknown - 5s 32ms/step - loss: 0.2933 - accuracy: 0.9215

In [18]:
#will only run inference one image at a time
#with the tflite interpreter
dataset_test = dataset_test.unbatch()

In [19]:
evaluate_model(interpreter)

0.9215

In [20]:
#no error change but took 3 as long?
evaluate_model(interpreter_quant)

0.9212

### Pruning
https://www.tensorflow.org/model_optimization/guide/pruning/train_sparse_models

https://www.tensorflow.org/lite/performance/post_training_quantization

https://www.tensorflow.org/model_optimization/guide/pruning/train_sparse_models

https://www.tensorflow.org/model_optimization/guide/pruning/pruning_with_keras

https://www.tensorflow.org/lite/guide/hosted_models