### Import Package

In [2]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np

import traceback
import contextlib

import pathlib

### Load Dataset

In [3]:
mnist = tf.keras.datasets.mnist
(X_train, y_train), (X_test, y_test) = mnist.load_data()
print("Train Image shape:", X_train.shape, "Test Image shape:", X_test.shape)

Train Image shape: (60000, 28, 28) Test Image shape: (10000, 28, 28)


In [4]:
# Normalize the images
X_train = X_train / 255.0
X_test = X_test / 255.0

### Conv2D Base Model

In [5]:
model = keras.Sequential([
  keras.layers.InputLayer(input_shape=(28, 28)),
  keras.layers.Reshape(target_shape=(28, 28, 1)),
  keras.layers.Conv2D(filters=12, kernel_size=(3, 3), activation='relu'),
  keras.layers.MaxPooling2D(pool_size=(2, 2)),
  keras.layers.Flatten(),
  keras.layers.Dense(10)
])


# Model summary
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
reshape (Reshape)            (None, 28, 28, 1)         0         
_________________________________________________________________
conv2d (Conv2D)              (None, 26, 26, 12)        120       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 13, 13, 12)        0         
_________________________________________________________________
flatten (Flatten)            (None, 2028)              0         
_________________________________________________________________
dense (Dense)                (None, 10)                20290     
Total params: 20,410
Trainable params: 20,410
Non-trainable params: 0
_________________________________________________________________


### Train Conv2D Base Model

In [6]:
model.compile(optimizer='adam',
              loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

In [7]:
model.fit(X_train,
         y_train,
         batch_size=64,
         epochs=10,
         validation_data=(X_test, y_test))

Train on 60000 samples, validate on 10000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f73fd99e128>

In [8]:
# Saving Model
model.save('1_mnist_model.h5')

In [9]:
# Evaluate the model on test set
score = model.evaluate(X_test, y_test, verbose=0)

# Print test accuracy
print('\n', 'Test accuracy:', score[1])


 Test accuracy: 0.9804


### Train model with pruning

In [10]:
! pip install -q tensorflow-model-optimization

You should consider upgrading via the '/home/db/.virtualenvs/LR/bin/python3 -m pip install --upgrade pip' command.[0m


In [11]:
import tensorflow_model_optimization as tfmot

prune_low_magnitude = tfmot.sparsity.keras.prune_low_magnitude

# Compute end step to finish pruning after 2 epochs.
batch_size = 128
epochs = 40
validation_split = 0.1 # 10% of training set will be used for validation set. 

num_images = X_train.shape[0] * (1 - validation_split)
end_step = np.ceil(num_images / batch_size).astype(np.int32) * epochs

# Define model for pruning.
pruning_params = {
      'pruning_schedule': tfmot.sparsity.keras.PolynomialDecay(initial_sparsity=0.50,
                                                               final_sparsity=0.80,
                                                               begin_step=0,
                                                               end_step=end_step)
}

model_for_pruning = prune_low_magnitude(model, **pruning_params)

# `prune_low_magnitude` requires a recompile.
model_for_pruning.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

model_for_pruning.summary()

Instructions for updating:
Please use `layer.add_weight` method instead.
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
prune_low_magnitude_reshape  (None, 28, 28, 1)         1         
_________________________________________________________________
prune_low_magnitude_conv2d ( (None, 26, 26, 12)        230       
_________________________________________________________________
prune_low_magnitude_max_pool (None, 13, 13, 12)        1         
_________________________________________________________________
prune_low_magnitude_flatten  (None, 2028)              1         
_________________________________________________________________
prune_low_magnitude_dense (P (None, 10)                40572     
Total params: 40,805
Trainable params: 20,410
Non-trainable params: 20,395
_________________________________________________________________


In [12]:
X_train.shape

(60000, 28, 28)

In [13]:
y_train.shape

(60000,)

In [14]:
mnist = tf.keras.datasets.mnist
(X_train, y_train), (X_test, y_test) = mnist.load_data()
print("Train Image shape:", X_train.shape, "Test Image shape:", X_test.shape)

Train Image shape: (60000, 28, 28) Test Image shape: (10000, 28, 28)


In [15]:
# Normalize the images
X_train = X_train / 255.0
X_test = X_test / 255.0

In [16]:

callbacks = [
  tfmot.sparsity.keras.UpdatePruningStep(),
  tfmot.sparsity.keras.PruningSummaries(log_dir='log'),
]

model_for_pruning.fit(X_train, y_train,
                  batch_size=batch_size, epochs=epochs, validation_split=validation_split,
                  callbacks=callbacks)

Train on 54000 samples, validate on 6000 samples
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


<tensorflow.python.keras.callbacks.History at 0x7f739c1b9518>

In [17]:
_, model_for_pruning_accuracy = model_for_pruning.evaluate(
   X_train, y_train, verbose=0)

print('Pruned test accuracy:', model_for_pruning_accuracy)

Pruned test accuracy: 0.9910333


In [18]:
model_for_export = tfmot.sparsity.keras.strip_pruning(model_for_pruning)

tf.keras.models.save_model(model_for_export, '2_mnist_model_pruning.h5', include_optimizer=False)

### Q-aware Training

In [19]:
import tensorflow_model_optimization as tfmot

quantize_model = tfmot.quantization.keras.quantize_model

# q_aware stands for for quantization aware.
q_aware_model = quantize_model(model)

# `quantize_model` requires a recompile.
q_aware_model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

q_aware_model.summary()

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: expected an indented block (<unknown>, line 14)
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: expected an indented block (<unknown>, line 14)
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: expected an indented block (<unknown>, line 14)
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: expected an indented block (<unknown>, line 14)
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and atta

In [20]:
# Train and evaluate the model against baseline

train_images_subset = X_train[0:1000] # out of 60000
train_labels_subset = y_train[0:1000]

q_aware_model.fit(train_images_subset, train_labels_subset,
                  batch_size=10, epochs=50, validation_split=0.1)

Train on 900 samples, validate on 100 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<tensorflow.python.keras.callbacks.History at 0x7f746a0c7908>

In [21]:
# Evaluate the model on test set
score = q_aware_model.evaluate(X_test, y_test, verbose=0)

# Print test accuracy
print('\n', 'Test accuracy:', score[1])


 Test accuracy: 0.9711


In [22]:
q_aware_model.save('3_mnist_model_qaware.h5')

### Convert Model to TFLite

In [23]:
def ConvertTFLite(model_path, filename):
  try:
    # Loading Model
    model = tf.keras.models.load_model(model_path)
    # Converter
    converter = tf.lite.TFLiteConverter.from_keras_model(model)
    tflite_model = converter.convert()
    #Specify path
    tflite_models_dir = pathlib.Path("tflite_models/")
    tflite_models_dir.mkdir(exist_ok=True, parents=True)
    filename = filename+".tflite"
    tflite_model_file = tflite_models_dir/filename
    # Save Model
    tflite_model_file.write_bytes(tflite_model)

    return f'Converted to TFLite, path {tflite_model_file}'
  except Exception as e:
    return str(e)

In [24]:
ConvertTFLite('./1_mnist_model.h5','4_mnist_model')

'Converted to TFLite, path tflite_models/4_mnist_model.tflite'

In [25]:
ConvertTFLite('./2_mnist_model_pruning.h5','5_mnist_pruning_model')



'Converted to TFLite, path tflite_models/5_mnist_pruning_model.tflite'

In [26]:
converter = tf.lite.TFLiteConverter.from_keras_model(q_aware_model)
quantized_tflite_model = converter.convert()

quantized_aware_tflite_file = '6_mnist_model_qaware.tflite'

with open(quantized_aware_tflite_file, 'wb') as f:
  f.write(quantized_tflite_model)

print('Saved quvantaised aware TFLite model to:', quantized_aware_tflite_file)

Saved quvantaised aware TFLite model to: 6_mnist_model_qaware.tflite


### Integer with Float fallback quantaization

In [9]:
def Quant_int_with_float(model_name, filename):
  try:
    model = tf.keras.models.load_model(model_name)
    converter = tf.lite.TFLiteConverter.from_keras_model(model)
    converter.optimizations = [tf.lite.Optimize.DEFAULT]
    tflite_model_quant = converter.convert()
    filename = filename+'.tflite'
    tflite_models_dir = pathlib.Path("tflite_models/")
    tflite_models_dir.mkdir(exist_ok=True, parents=True)
    
    tflite_model_quant_file = tflite_models_dir/filename
    tflite_model_quant_file.write_bytes(tflite_model_quant)

    return f'Converted - path {tflite_model_quant_file}'
  
  except Exception as e:
    return str(e)

In [10]:
Quant_int_with_float('./1_mnist_model.h5', '7_mnist_Integer_float_model')

'Converted - path tflite_models/7_mnist_Integer_float_model.tflite'

In [11]:
Quant_int_with_float('./2_mnist_model_pruning.h5','8_mnist_pruning_Integer_float_model')



'Converted - path tflite_models/8_mnist_pruning_Integer_float_model.tflite'

In [30]:
converter = tf.lite.TFLiteConverter.from_keras_model(q_aware_model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
mnist_train, _ = tf.keras.datasets.mnist.load_data()
images = tf.cast(mnist_train[0], tf.float32) / 255.0
mnist_ds = tf.data.Dataset.from_tensor_slices((images)).batch(1)
def representative_data_gen():
  for input_value in mnist_ds.take(100):
    yield [input_value]

converter.representative_dataset = representative_data_gen

quantized_tflite_model = converter.convert()
tflite_models_dir = pathlib.Path("tflite_models/")
tflite_models_dir.mkdir(exist_ok=True, parents=True)
tflite_model_quant_file = tflite_models_dir/"9_mnist_Qaware_Integer_float_model.tflite"
tflite_model_quant_file.write_bytes(quantized_tflite_model)

24064

### Float 16 Quantization

In [31]:
def Quant_float(model_name, filename):
  try:
    model = tf.keras.models.load_model(model_name)
    converter = tf.lite.TFLiteConverter.from_keras_model(model)
    converter.optimizations = [tf.lite.Optimize.DEFAULT]
    converter.target_spec.supported_types = [tf.float16]

    tflite_fp16_model = converter.convert()
    filename = filename+'.tflite'
    tflite_models_fp16_dir = pathlib.Path("tflite_models/")
    tflite_models_fp16_dir.mkdir(exist_ok=True, parents=True)
    
    tflite_model_fp16_file = tflite_models_fp16_dir/filename
    tflite_model_fp16_file.write_bytes(tflite_fp16_model)

    return f'Converted - path {tflite_model_fp16_file}'
  
  except Exception as e:
    return str(e)

In [32]:
Quant_float('./1_mnist_model.h5', '10_mnist_float16_model')

'Converted - path tflite_models/10_mnist_float16_model.tflite'

In [33]:
Quant_float('./2_mnist_model_pruning.h5', '11_mnist_float_pruning_model')



'Converted - path tflite_models/11_mnist_float_pruning_model.tflite'

In [34]:
Quant_float('./mnist_model_sperable.h5','mnist_sperable_float_model')

'SavedModel file does not exist at: ./mnist_model_sperable.h5/{saved_model.pbtxt|saved_model.pb}'

In [35]:
converter = tf.lite.TFLiteConverter.from_keras_model(q_aware_model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_types = [tf.float16]

tflite_fp16_model = converter.convert()
tflite_model_fp16_file = tflite_models_dir/"12_mnist_Qaware_float16_model.tflite"
tflite_model_fp16_file.write_bytes(tflite_fp16_model)

43568

### Integer Only

In [36]:
def Quant_integer(model_name, filename):
  try:
    model = tf.keras.models.load_model(model_name)
    converter = tf.lite.TFLiteConverter.from_keras_model(model)
    converter.optimizations = [tf.lite.Optimize.DEFAULT]
    mnist_train, _ = tf.keras.datasets.mnist.load_data()
    images = tf.cast(mnist_train[0], tf.float32) / 255.0
    mnist_ds = tf.data.Dataset.from_tensor_slices((images)).batch(1)
    def representative_data_gen():
      for input_value in mnist_ds.take(100):
        yield [input_value]

    converter.representative_dataset = representative_data_gen

    converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
    converter.inference_input_type = tf.int8  # or tf.uint8
    converter.inference_output_type = tf.int8  # or tf.uint8

    tflite_int_quant_model = converter.convert()

    filename = filename+'.tflite'
    tflite_models_dir = pathlib.Path("tflite_models/")
    tflite_models_dir.mkdir(exist_ok=True, parents=True)
    
    tflite_model_integeronly_file = tflite_models_dir/filename
    tflite_model_integeronly_file.write_bytes(tflite_int_quant_model)

    return f'Converted - path {tflite_model_integeronly_file}'
  
  except Exception as e:
    return str(e)

In [37]:
Quant_integer('./1_mnist_model.h5', '13_mnist_integeronly_model')

'Converted - path tflite_models/13_fashion_mnist_integeronly_model.tflite'

In [38]:
Quant_integer('./2_mnist_model_pruning.h5', '14_mnist_Integeronly_pruning_model')



'Converted - path tflite_models/14_mnist_Integeronly_pruning_model.tflite'

In [39]:
Quant_integer('3_mnist_model_qaware.h5','15_mnist_qaware_integer_model')

'Unknown layer: QuantizeWrapper'

In [40]:
converter = tf.lite.TFLiteConverter.from_keras_model(q_aware_model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
mnist_train, _ = tf.keras.datasets.mnist.load_data()
images = tf.cast(mnist_train[0], tf.float32) / 255.0
mnist_ds = tf.data.Dataset.from_tensor_slices((images)).batch(1)
def representative_data_gen():
  for input_value in mnist_ds.take(100):
    yield [input_value]

converter.representative_dataset = representative_data_gen
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.int8  # or tf.uint8
converter.inference_output_type = tf.int8  # or tf.uint8

tflite_int_quant_model = converter.convert()

filename = filename+'.tflite'
tflite_models_dir = pathlib.Path("tflite_models/")
tflite_models_dir.mkdir(exist_ok=True, parents=True)
    
tflite_model_integeronly_file = tflite_models_dir/filename
tflite_model_integeronly_file.write_bytes(tflite_int_quant_model)

RuntimeError: Quantization not yet supported for op: FAKE_QUANT

### Evalvate Model

In [5]:
import time

### Keras model Evaluation

In [41]:
def evaluate_keras_model_single_unit(model_path):
  start_time_infer = time.time()
  model = tf.keras.models.load_model(model_path, compile = True)
  model.compile(optimizer='adam',
           loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
           metrics=['accuracy'])
  data = X_test[0]
  data = data.reshape((1, 28, 28))
  data_y = y_test[0:1]
  score = model.evaluate(data, data_y, verbose=0)

  result = {'Time to single unit infer': (time.time() - start_time_infer),
            'Score' : score[1]}

  return result

In [43]:
evaluate_keras_model_single_unit('./1_mnist_model.h5')

{'Time to single unit infer': 0.34443020820617676, 'Score': 1.0}

In [44]:
evaluate_keras_model_single_unit('./2_mnist_model_pruning.h5')



{'Time to single unit infer': 0.18784451484680176, 'Score': 1.0}

In [38]:
def evaluate_keras_model_test_set(model_path):
  start_time_infer = time.time()
  model = tf.keras.models.load_model(model_path, compile = True)
  model.compile(optimizer='adam',
           loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
           metrics=['accuracy'])
  score = score = model.evaluate(X_test, y_test, verbose =0)

  result = {'Time to infer for the whole test set': (time.time() - start_time_infer),
            'Score' : score[1]}

  return result

In [39]:
evaluate_keras_model_test_set('./1_mnist_model.h5')

{'Time to infer for the whole test set': 1.2550950050354004, 'Score': 0.9804}

In [40]:
evaluate_keras_model_test_set('./2_mnist_model_pruning.h5')



{'Time to infer for the whole test set': 1.0744516849517822, 'Score': 0.9784}

### TF Lite Model Evaluvation

In [7]:
# Evaluate the mode
def evaluate_tflite_model_test_set(interpreter):
  start_time = time.time()

  input_index = interpreter.get_input_details()[0]["index"]
  output_index = interpreter.get_output_details()[0]["index"]

  # Run predictions on every image in the "test" dataset.
  prediction_digits = []
  for test_image in X_test:
    # Pre-processing: add batch dimension and convert to float32 to match with
    # the model's input data format.
    test_image = np.expand_dims(test_image, axis=0).astype(np.float32)
    interpreter.set_tensor(input_index, test_image)
  
    # Run inference.
    interpreter.invoke()

    # Post-processing: remove batch dimension and find the digit with highest
    # probability.
    output = interpreter.tensor(output_index)
    digit = np.argmax(output()[0])
    prediction_digits.append(digit)

  # Compare prediction results with ground truth labels to calculate accuracy.
  accurate_count = 0
  for index in range(len(prediction_digits)):
    if prediction_digits[index] == y_test[index]:
      accurate_count += 1
  accuracy = accurate_count * 1.0 / len(prediction_digits)

  results = {'time': (time.time() - start_time),
             'accuracy': accuracy}

  return results

### TF Lite Models

In [16]:
# TF Lite
tflite_model_file = 'tflite_models/4_mnist_model.tflite'
interpreter = tf.lite.Interpreter(model_path=str(tflite_model_file))
interpreter.allocate_tensors()
evaluate_tflite_model_test_set(interpreter)

{'time': 1.035888671875, 'accuracy': 0.9804}

In [17]:
# Purning TF Lite 
tflite_pruning_model_file = 'tflite_models/5_mnist_pruning_model.tflite'
interpreter_pruning = tf.lite.Interpreter(model_path=str(tflite_pruning_model_file))
interpreter_pruning.allocate_tensors()
evaluate_tflite_model_test_set(interpreter_pruning)

{'time': 1.0091731548309326, 'accuracy': 0.9784}

In [19]:
# Qaware Model
tflite_model_file = 'tflite_models/6_mnist_model_qaware.tflite'
interpreter_qaware = tf.lite.Interpreter(model_path=str(tflite_model_file))
interpreter_qaware.allocate_tensors()
evaluate_tflite_model_test_set(interpreter_qaware)

{'time': 1.4142177104949951, 'accuracy': 0.9711}

### Integer Float TF Lite models

In [20]:
# TF Lite
tflite_model_file = 'tflite_models/7_mnist_Integer_float_model.tflite'
interpreter_int_float = tf.lite.Interpreter(model_path=str(tflite_model_file))
interpreter_int_float.allocate_tensors()
evaluate_tflite_model_test_set(interpreter_int_float)

{'time': 1.0922648906707764, 'accuracy': 0.9803}

In [21]:
# Purning TF Lite 
tflite_pruning_model_file = 'tflite_models/8_mnist_pruning_Integer_float_model.tflite'
interpreter_int_float_pruning = tf.lite.Interpreter(model_path=str(tflite_pruning_model_file))
interpreter_int_float_pruning.allocate_tensors()
evaluate_tflite_model_test_set(interpreter_int_float_pruning)

{'time': 1.0958216190338135, 'accuracy': 0.9785}

In [50]:
# Q-aware TF Lite 
tflite_qaware_model_file = 'tflite_models/9_mnist_Qaware_Integer_float_model.tflite'
interpreter_tflite_qaware_intfloat = tf.lite.Interpreter(model_path=str(tflite_qaware_model_file))
interpreter_tflite_qaware_intfloat.allocate_tensors()
evaluate_tflite_model_test_set(interpreter_tflite_qaware_intfloat)

{'time': 2.127317190170288, 'accuracy': 0.971}

### Float Tflite

In [23]:
# TF Lite
tflite_model_file = 'tflite_models/10_mnist_float16_model.tflite'
interpreter_float = tf.lite.Interpreter(model_path=str(tflite_model_file))
interpreter_float.allocate_tensors()
evaluate_tflite_model_test_set(interpreter_float)

{'time': 1.0245018005371094, 'accuracy': 0.9804}

In [24]:
# Purning TF Lite 
tflite_pruning_model_file = 'tflite_models/11_mnist_float_pruning_model.tflite'
interpreter_float_pruning = tf.lite.Interpreter(model_path=str(tflite_pruning_model_file))
interpreter_float_pruning.allocate_tensors()
evaluate_tflite_model_test_set(interpreter_float_pruning)

{'time': 1.0211999416351318, 'accuracy': 0.9784}

In [47]:
tflite_qaware_model_file = 'tflite_models/12_mnist_Qaware_float16_model.tflite'
interpreter_tflite_qaware_float16 = tf.lite.Interpreter(model_path=str(tflite_qaware_model_file))
interpreter_tflite_qaware_float16.allocate_tensors()
evaluate_tflite_model_test_set(interpreter_tflite_qaware_float16)

{'time': 1.4793415069580078, 'accuracy': 0.9711}

### Integer Only TFlite

In [26]:
# TF Lite
tflite_model_file = 'tflite_models/13_mnist_integeronly_model.tflite'
interpreter_int = tf.lite.Interpreter(model_path=str(tflite_model_file))
interpreter_int.allocate_tensors()
evaluate_tflite_model_test_set(interpreter_int)

{'time': 1.6056203842163086, 'accuracy': 0.9805}

In [27]:
# Purning TF Lite 
tflite_pruning_model_file = 'tflite_models/14_mnist_Integeronly_pruning_model.tflite'
interpreter_int_pruning = tf.lite.Interpreter(model_path=str(tflite_pruning_model_file))
interpreter_int_pruning.allocate_tensors()
evaluate_tflite_model_test_set(interpreter_int_pruning)

{'time': 1.6063134670257568, 'accuracy': 0.9785}

### Single unit Evaluate

In [28]:
# Evaluate the mode
def evaluate_tflite_model_single_unit(interpreter):
  start_time = time.time()

  input_index = interpreter.get_input_details()[0]["index"]
  output_index = interpreter.get_output_details()[0]["index"]

  test_image = np.expand_dims(X_test[0], axis=0).astype(np.float32)
  interpreter.set_tensor(input_index, test_image)
  
  # Run inference.
  interpreter.invoke()

  # Post-processing: remove batch dimension and find the digit with highest
  # probability.
  output = interpreter.tensor(output_index)

  results = {'time': (time.time() - start_time)}

  return results

In [29]:
# TF Lite
evaluate_tflite_model_single_unit(interpreter)

{'time': 0.0003230571746826172}

In [30]:
evaluate_tflite_model_single_unit(interpreter_pruning)

{'time': 0.0006458759307861328}

In [43]:
evaluate_tflite_model_single_unit(interpreter_int_float)

{'time': 0.0003819465637207031}

In [32]:
evaluate_tflite_model_single_unit(interpreter_qaware)

{'time': 0.00030803680419921875}

In [33]:
evaluate_tflite_model_single_unit(interpreter_int_float_pruning)

{'time': 0.0002536773681640625}

In [34]:
evaluate_tflite_model_single_unit(interpreter_float)

{'time': 0.0006232261657714844}

In [53]:
evaluate_tflite_model_single_unit(interpreter_float_pruning)

{'time': 0.0003516674041748047}

In [36]:
evaluate_tflite_model_single_unit(interpreter_int)

{'time': 0.0005064010620117188}

In [55]:
evaluate_tflite_model_single_unit(interpreter_int_pruning)

{'time': 0.00037598609924316406}

In [51]:
evaluate_tflite_model_single_unit(interpreter_tflite_qaware_intfloat)

{'time': 0.0005180835723876953}

In [49]:
evaluate_tflite_model_single_unit(interpreter_tflite_qaware_float16)

{'time': 0.0003058910369873047}