# 03_aware_training
* [1. Train a basic model for Cifar10](#1.-Train-a-basic-model-for-Cifar10)
* [2. Optimization](#2.-Optimization)
     - [2.1 Quantization](#2.1-Quantization)
     - [2.2 Pruning](#2.2-Pruning)
     - [2.3 Clustering](#2.3-Clustering)

In [1]:
import os
import tempfile
import numpy as np
import tensorflow as tf
from tensorflow import keras

In [2]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (Input, Dense, Activation, Flatten, Conv2D, MaxPooling2D, Reshape)
from tensorflow.keras.optimizers import Adam

## 1. Train a basic model for Cifar10

In [3]:
# cifar10 中有將 data 先分為 train 和 test
cifar10 = keras.datasets.cifar10
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

print('x_train shape:', x_train.shape)
print('y_train.shape:', y_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# x_train.shape: 四個維度：第 1 維度為筆數、第 2, 3 維度為影像大小 32*32、第 4 維度是 RGB 三原色，所以是 3
# x_train 中有 50000 筆訓練資料，以及 x_test 中有 10000 筆的測試資料

x_train shape: (50000, 32, 32, 3)
y_train.shape: (50000, 1)
50000 train samples
10000 test samples


In [4]:
# 記得轉成 'float32'
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')

# 將 features (照片影像特徵值) 標準化，可以提高模型預測的準確度，並且更快收斂
x_train /= 255  # rescaling
x_test /= 255   # rescaling

In [5]:
# 將訓練資料與測試資料的 label，進行 Onehot encoding 轉換
num_classes = 10
# y_train = keras.utils.to_categorical(y_train, num_classes)
# y_test = keras.utils.to_categorical(y_test, num_classes)

y_train = np.eye(num_classes, dtype='float32')[y_train[:, 0]]
y_test = np.eye(num_classes, dtype='float32')[y_test[:, 0]]

print('y_train shape:', y_train.shape)
print('y_test shape:', y_test.shape)
print('y_test.argmax(1) shape:', y_test.argmax(1).shape)

y_train shape: (50000, 10)
y_test shape: (10000, 10)
y_test.argmax(1) shape: (10000,)


In [6]:
# 選擇 Keras 的 API 寫法
inputs = Input(shape=x_train.shape[1:])

# 第一層
# 建立卷積層，設定32個3*3的filters
# 設定ReLU為激活函數。
x = Conv2D(32, (3, 3), activation='relu')(inputs)

# 第二層 - 卷積層 + 池化層
x = Conv2D(32, (3, 3), activation='relu')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)

# 第三層 - 卷積層
x = Conv2D(64, (3, 3), activation='relu')(x)

# 第四層 - 卷積層 + 池化層
x = Conv2D(64, (3, 3), activation='relu')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)

# 建立分類模型 (MLP) : 平坦層 + 輸出層 (10)
x = Flatten()(x)
outputs = Dense(num_classes, activation='softmax')(x)

model = Model(inputs=inputs, outputs=outputs)

In [7]:
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 32, 32, 3)]       0         
_________________________________________________________________
conv2d (Conv2D)              (None, 30, 30, 32)        896       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 28, 28, 32)        9248      
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 12, 12, 64)        18496     
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 10, 10, 64)        36928     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 5, 5, 64)          0     

In [8]:
batch_size = 64
num_epochs = 20
learning_rate = 1e-3
optimizer = Adam(lr=learning_rate)

model.compile(optimizer=optimizer,
              loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 32, 32, 3)]       0         
_________________________________________________________________
conv2d (Conv2D)              (None, 30, 30, 32)        896       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 28, 28, 32)        9248      
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 12, 12, 64)        18496     
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 10, 10, 64)        36928     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 5, 5, 64)          0     

In [9]:
history = model.fit(x_train, y_train,
                    batch_size=batch_size,
                    epochs=num_epochs,
                    validation_data=(x_test, y_test),
                    verbose=2)

Train on 50000 samples, validate on 10000 samples
Epoch 1/20
50000/50000 - 5s - loss: 2.1245 - accuracy: 0.3247 - val_loss: 2.0897 - val_accuracy: 0.3630
Epoch 2/20
50000/50000 - 3s - loss: 2.0091 - accuracy: 0.4462 - val_loss: 1.9548 - val_accuracy: 0.5024
Epoch 3/20
50000/50000 - 3s - loss: 1.9443 - accuracy: 0.5139 - val_loss: 1.9266 - val_accuracy: 0.5303
Epoch 4/20
50000/50000 - 3s - loss: 1.8940 - accuracy: 0.5651 - val_loss: 1.8819 - val_accuracy: 0.5762
Epoch 5/20
50000/50000 - 3s - loss: 1.8581 - accuracy: 0.6005 - val_loss: 1.8559 - val_accuracy: 0.6039
Epoch 6/20
50000/50000 - 3s - loss: 1.8298 - accuracy: 0.6297 - val_loss: 1.8454 - val_accuracy: 0.6123
Epoch 7/20
50000/50000 - 3s - loss: 1.8140 - accuracy: 0.6448 - val_loss: 1.8372 - val_accuracy: 0.6233
Epoch 8/20
50000/50000 - 3s - loss: 1.7927 - accuracy: 0.6673 - val_loss: 1.8238 - val_accuracy: 0.6343
Epoch 9/20
50000/50000 - 3s - loss: 1.7780 - accuracy: 0.6821 - val_loss: 1.8215 - val_accuracy: 0.6355
Epoch 10/20
50

In [10]:
_, baseline_model_accuracy = model.evaluate(
    x_test, y_test, verbose=0)

print('Baseline test accuracy:', baseline_model_accuracy)

_, keras_file = tempfile.mkstemp('.h5')
tf.keras.models.save_model(model, keras_file, include_optimizer=False)
print('Saved baseline model to:', keras_file)

Baseline test accuracy: 0.7065
Saved baseline model to: /tmp/tmpxew3o8lm.h5


# 2. Optimization

In [11]:
!pip install tensorflow_model_optimization

Collecting tensorflow_model_optimization
  Using cached https://files.pythonhosted.org/packages/55/38/4fd48ea1bfcb0b6e36d949025200426fe9c3a8bfae029f0973d85518fa5a/tensorflow_model_optimization-0.5.0-py2.py3-none-any.whl
Collecting dm-tree~=0.1.1
  Using cached https://files.pythonhosted.org/packages/6b/d9/6d88e8d32bb454c4ef8f50c62714b0eb20170f4c1d2cd316e0d99755405e/dm_tree-0.1.5-cp37-cp37m-manylinux1_x86_64.whl
Installing collected packages: dm-tree, tensorflow-model-optimization
Successfully installed dm-tree-0.1.5 tensorflow-model-optimization-0.5.0


In [12]:
import tensorflow_model_optimization as tfmot

In [13]:
def evaluate_model(interpreter, x_test, y_test):
    input_index = interpreter.get_input_details()[0]["index"]
    output_index = interpreter.get_output_details()[0]["index"]

    # Run predictions on every image in the "test" dataset.
    prediction_digits = []
    for i, test_image in enumerate(x_test):
        test_image = np.expand_dims(test_image, axis=0).astype('float32')
        interpreter.set_tensor(input_index, test_image)

        # Run inference.
        interpreter.invoke()

        # probability.
        output = interpreter.tensor(output_index)
        digit = np.argmax(output()[0])
        prediction_digits.append(digit)

    # Compare prediction results with ground truth labels to calculate accuracy.
    prediction_digits = np.array(prediction_digits)
    accuracy = (prediction_digits == y_test.argmax(-1)).mean()
    return accuracy

In [14]:
def get_gzipped_model_size(file):
    # Returns size of gzipped model, in bytes.
    import os
    import zipfile

    _, zipped_file = tempfile.mkstemp('.zip')
    with zipfile.ZipFile(zipped_file, 'w', compression=zipfile.ZIP_DEFLATED) as f:
        f.write(file)

    return os.path.getsize(zipped_file)

## 2.1 Quantization

In [15]:
quantize_model = tfmot.quantization.keras.quantize_model
quantized_model = quantize_model(model)

In [16]:
# 'quantize_model' requires a recompile.
quantized_model.compile(optimizer='adam',
                        loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
                        metrics=['accuracy'])

quantized_model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 32, 32, 3)]       0         
_________________________________________________________________
quantize_layer (QuantizeLaye (None, 32, 32, 3)         3         
_________________________________________________________________
quant_conv2d (QuantizeWrappe (None, 30, 30, 32)        963       
_________________________________________________________________
quant_conv2d_1 (QuantizeWrap (None, 28, 28, 32)        9315      
_________________________________________________________________
quant_max_pooling2d (Quantiz (None, 14, 14, 32)        1         
_________________________________________________________________
quant_conv2d_2 (QuantizeWrap (None, 12, 12, 64)        18627     
_________________________________________________________________
quant_conv2d_3 (QuantizeWrap (None, 10, 10, 64)        37059 

In [18]:
quantized_model.fit(x_train, y_train,
                    batch_size=batch_size,
                    epochs=3,
                    validation_data=(x_test, y_test))

Train on 50000 samples, validate on 10000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x7f33a1da5400>

In [19]:
_, quantized_model_accuracy = quantized_model.evaluate(
    x_test, y_test, verbose=0)

print('Baseline test accuracy:', baseline_model_accuracy)
print('Quant test accuracy:', quantized_model_accuracy)

Baseline test accuracy: 0.7065
Quant test accuracy: 0.7037


In [20]:
_, quantized_keras_file = tempfile.mkstemp('.h5')
tf.keras.models.save_model(quantized_model, quantized_keras_file, include_optimizer=False)
print('Saved quantized Keras model to:', quantized_keras_file)

Saved quantized Keras model to: /tmp/tmpa669484e.h5


In [21]:
converter = tf.lite.TFLiteConverter.from_keras_model(quantized_model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
quantized_tflite_model = converter.convert()

_, quantized_tflite_file = tempfile.mkstemp('.tflite')
with open(quantized_tflite_file, 'wb') as f:
    f.write(quantized_tflite_model)
    
print('Saved quantized tflite model to:', quantized_tflite_file)

Saved quantized tflite model to: /tmp/tmpvsnhhdmd.tflite


In [22]:
print("Size of gzipped baseline Keras model: %.2f bytes" % (get_gzipped_model_size(keras_file)))
print("Size of gzipped quantized Keras model: %.2f bytes" % (get_gzipped_model_size(quantized_keras_file)))
print("Size of gzipped quantized TFlite model: %.2f bytes" % (get_gzipped_model_size(quantized_tflite_file)))
print(f"see {get_gzipped_model_size(keras_file)/get_gzipped_model_size(quantized_tflite_file)}x smaller model from quantization")

Size of gzipped baseline Keras model: 307417.00 bytes
Size of gzipped quantized Keras model: 311219.00 bytes
Size of gzipped quantized TFlite model: 72155.00 bytes
see 4.260508627260758x smaller model from quantization


### See persistence of accuracy from TF to TFLite

In [23]:
interpreter = tf.lite.Interpreter(model_content=quantized_tflite_model)
interpreter.allocate_tensors()

test_accuracy = evaluate_model(interpreter, x_test, y_test)

print('Baseline test accuracy:', baseline_model_accuracy)
print('Quant TF test accuracy:', quantized_model_accuracy)
print('Quant TFLite test_accuracy:', test_accuracy)

Baseline test accuracy: 0.7065
Quant TF test accuracy: 0.7037
Quant TFLite test_accuracy: 0.7059


In [24]:
# Save the quantized model.
with open('tflite_model/mobilenet_aware_quant.tflite', 'wb') as f:
    f.write(quantized_tflite_model)

## 2.2 Pruning

In [25]:
# Compute end step to finish pruning after 3 epochs.
epochs = 3
num_images = x_train.shape[0]
end_step = np.ceil(num_images / batch_size).astype(np.int32) * epochs

In [26]:
# Define model for pruning.
pruning_params = {
    'pruning_schedule': tfmot.sparsity.keras.PolynomialDecay(initial_sparsity=0.50,
                                                             final_sparsity=0.80, 
                                                             begin_step=0,
                                                             end_step=end_step)
}

In [27]:
prune_low_magnitude = tfmot.sparsity.keras.prune_low_magnitude
pruned_model = prune_low_magnitude(model, **pruning_params)

Instructions for updating:
Please use `layer.add_weight` method instead.


In [28]:
# 'prune_low_magnitude' requires a recompile.
pruned_model.compile(optimizer='adam',
                     loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
                     metrics=['accuracy'])

pruned_model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 32, 32, 3)]       0         
_________________________________________________________________
prune_low_magnitude_conv2d ( (None, 30, 30, 32)        1762      
_________________________________________________________________
prune_low_magnitude_conv2d_1 (None, 28, 28, 32)        18466     
_________________________________________________________________
prune_low_magnitude_max_pool (None, 14, 14, 32)        1         
_________________________________________________________________
prune_low_magnitude_conv2d_2 (None, 12, 12, 64)        36930     
_________________________________________________________________
prune_low_magnitude_conv2d_3 (None, 10, 10, 64)        73794     
_________________________________________________________________
prune_low_magnitude_max_pool (None, 5, 5, 64)          1     

In [29]:
logdir = tempfile.mkdtemp()

callbacks = [
    tfmot.sparsity.keras.UpdatePruningStep(),
    tfmot.sparsity.keras.PruningSummaries(log_dir=logdir),
]
  
pruned_model.fit(x_train, y_train,
                 batch_size=batch_size, 
                 epochs=epochs, 
                 validation_data=(x_test, y_test),
                 callbacks=callbacks)

Train on 50000 samples, validate on 10000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x7f32d842aef0>

For this example, there is minimal loss in test accuracy after pruning, compared to the baseline.

In [30]:
_, pruned_model_accuracy = pruned_model.evaluate(
   x_test, y_test, verbose=0)

print('Baseline test accuracy:', baseline_model_accuracy) 
print('Pruned test accuracy:', pruned_model_accuracy)

Baseline test accuracy: 0.7065
Pruned test accuracy: 0.7187


In [31]:
model_for_export = tfmot.sparsity.keras.strip_pruning(pruned_model)

_, pruned_keras_file = tempfile.mkstemp('.h5')
tf.keras.models.save_model(model_for_export, pruned_keras_file, include_optimizer=False)
print('Saved pruned Keras model to:', pruned_keras_file)

Saved pruned Keras model to: /tmp/tmpkxpi6fn1.h5


In [32]:
converter = tf.lite.TFLiteConverter.from_keras_model(model_for_export)
pruned_tflite_model = converter.convert()

_, pruned_tflite_file = tempfile.mkstemp('.tflite')

with open(pruned_tflite_file, 'wb') as f:
    f.write(pruned_tflite_model)

print('Saved pruned TFLite model to:', pruned_tflite_file)

Saved pruned TFLite model to: /tmp/tmpcie5r32q.tflite


In [33]:
print("Size of gzipped baseline Keras model: %.2f bytes" % (get_gzipped_model_size(keras_file)))
print("Size of gzipped pruned Keras model: %.2f bytes" % (get_gzipped_model_size(pruned_keras_file)))
print("Size of gzipped pruned TFlite model: %.2f bytes" % (get_gzipped_model_size(pruned_tflite_file)))
print(f"see {get_gzipped_model_size(keras_file)/get_gzipped_model_size(pruned_tflite_file)}x smaller model from pruning")

Size of gzipped baseline Keras model: 307417.00 bytes
Size of gzipped pruned Keras model: 99917.00 bytes
Size of gzipped pruned TFlite model: 96992.00 bytes
see 3.169508825470142x smaller model from pruning


In [34]:
# Save the pruned model.
with open('tflite_model/mobilenet_aware_pruned.tflite', 'wb') as f:
    f.write(pruned_tflite_model)

### Create a smaller model from combining pruning and quantization

In [35]:
converter = tf.lite.TFLiteConverter.from_keras_model(model_for_export)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
quantized_and_pruned_tflite_model = converter.convert()

_, quantized_and_pruned_tflite_file = tempfile.mkstemp('.tflite')

with open(quantized_and_pruned_tflite_file, 'wb') as f:
    f.write(quantized_and_pruned_tflite_model)

print('Saved quantized and pruned TFLite model to:', quantized_and_pruned_tflite_file)

Saved quantized and pruned TFLite model to: /tmp/tmpc9_nur2g.tflite


In [36]:
print("Size of gzipped baseline Keras model: %.2f bytes" % (get_gzipped_model_size(keras_file)))
print("Size of gzipped quantized and pruned TFlite model: %.2f bytes" % (get_gzipped_model_size(quantized_and_pruned_tflite_file)))
print(f"see {get_gzipped_model_size(keras_file)/get_gzipped_model_size(quantized_and_pruned_tflite_file)}x smaller model from pruning adn quantization")

Size of gzipped baseline Keras model: 307417.00 bytes
Size of gzipped quantized and pruned TFlite model: 29974.00 bytes
see 10.256121972376059x smaller model from pruning adn quantization


In [37]:
# Save the pruned and quantized model.
with open('tflite_model/mobilenet_aware_quantxpruned.tflite', 'wb') as f:
    f.write(quantized_and_pruned_tflite_model)

### See persistence of accuracy from TF to TFLite

In [38]:
interpreter = tf.lite.Interpreter(model_content=quantized_and_pruned_tflite_model)
interpreter.allocate_tensors()

test_accuracy = evaluate_model(interpreter, x_test, y_test)

print('Baseline test accuracy:', baseline_model_accuracy)
print('Pruned TF test accuracy:', pruned_model_accuracy)
print('Pruned and quantized TFLite test_accuracy:', test_accuracy)

Baseline test accuracy: 0.7065
Pruned TF test accuracy: 0.7187
Pruned and quantized TFLite test_accuracy: 0.7194


## 2.3 Clustering

In [39]:
# Define model for clustering.
clustering_params = {
  'number_of_clusters': 16,
  'cluster_centroids_init': tfmot.clustering.keras.CentroidInitialization.LINEAR
}

In [40]:
cluster_weights = tfmot.clustering.keras.cluster_weights
clustered_model = cluster_weights(model, **clustering_params)

In [41]:
# Use smaller learning rate for fine-tuning clustered model
learning_rate = 1e-5
optimizer = tf.keras.optimizers.Adam(lr=learning_rate)

clustered_model.compile(optimizer=optimizer,
                        loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
                        metrics=['accuracy'])

clustered_model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 32, 32, 3)]       0         
_________________________________________________________________
cluster_conv2d (ClusterWeigh (None, 30, 30, 32)        912       
_________________________________________________________________
cluster_conv2d_1 (ClusterWei (None, 28, 28, 32)        9264      
_________________________________________________________________
cluster_max_pooling2d (Clust (None, 14, 14, 32)        0         
_________________________________________________________________
cluster_conv2d_2 (ClusterWei (None, 12, 12, 64)        18512     
_________________________________________________________________
cluster_conv2d_3 (ClusterWei (None, 10, 10, 64)        36944     
_________________________________________________________________
cluster_max_pooling2d_1 (Clu (None, 5, 5, 64)          0     

In [42]:
clustered_model.fit(x_train, y_train,
                    batch_size=batch_size,
                    epochs=3,
                    validation_data=(x_test, y_test))

Train on 50000 samples, validate on 10000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x7f32780bd860>

In [43]:
_, clustered_model_accuracy = clustered_model.evaluate(
  x_test, y_test, verbose=0)

print('Baseline test accuracy:', baseline_model_accuracy)
print('Clustered test accuracy:', clustered_model_accuracy)

Baseline test accuracy: 0.7065
Clustered test accuracy: 0.6181


In [44]:
model_for_export = tfmot.clustering.keras.strip_clustering(clustered_model)

_, clustered_keras_file = tempfile.mkstemp('.h5')
tf.keras.models.save_model(model_for_export, clustered_keras_file, include_optimizer=False)
print('Saved clustered keras model to:', clustered_keras_file)

Saved clustered keras model to: /tmp/tmpx_rfzbhf.h5


In [45]:
converter = tf.lite.TFLiteConverter.from_keras_model(model_for_export)
clustered_tflite_model = converter.convert()

_, clustered_tflite_file = tempfile.mkstemp('.tflite')

with open(clustered_tflite_file, 'wb') as f:
    f.write(clustered_tflite_model)

print('Saved clustered TFLite model to:', clustered_tflite_file)

Saved clustered TFLite model to: /tmp/tmpyd8vmnrt.tflite


In [46]:
print("Size of gzipped baseline Keras model: %.2f bytes" % (get_gzipped_model_size(keras_file)))
print("Size of gzipped clustered Keras model: %.2f bytes" % (get_gzipped_model_size(clustered_keras_file)))
print("Size of gzipped clustered TFlite model: %.2f bytes" % (get_gzipped_model_size(clustered_tflite_file)))
print(f"see {get_gzipped_model_size(keras_file)/get_gzipped_model_size(clustered_tflite_file)}x smaller model from clustering")

Size of gzipped baseline Keras model: 307417.00 bytes
Size of gzipped clustered Keras model: 29958.00 bytes
Size of gzipped clustered TFlite model: 27805.00 bytes
see 11.05617694659234x smaller model from clustering


In [47]:
# Save the clustered model.
with open('tflite_model/mobilenet_aware_clustered.tflite', 'wb') as f:
    f.write(clustered_tflite_model)

### Create a smaller model from combining clustering and quantization

In [48]:
converter = tf.lite.TFLiteConverter.from_keras_model(model_for_export)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
quantized_and_clustered_tflite_model = converter.convert()

_, quantized_and_clustered_tflite_file = tempfile.mkstemp('.tflite')

with open(quantized_and_clustered_tflite_file, 'wb') as f:
    f.write(quantized_and_clustered_tflite_model)

print('Saved quantized and clustered TFLite model to:', quantized_and_clustered_tflite_file)

Saved quantized and clustered TFLite model to: /tmp/tmprgl8q3ak.tflite


In [49]:
print(f"Size of gzipped baseline Keras model: {get_gzipped_model_size(keras_file):.2f} bytes")
print(f"Size of gzipped quantized and clustered TFlite model: {get_gzipped_model_size(quantized_and_clustered_tflite_file):.2f} bytes")
print(f"see {get_gzipped_model_size(keras_file)/get_gzipped_model_size(quantized_and_clustered_tflite_file)}x smaller model from clustering and quantization")

Size of gzipped baseline Keras model: 307417.00 bytes
Size of gzipped quantized and clustered TFlite model: 21243.00 bytes
see 14.47144941863202x smaller model from clustering and quantization


In [50]:
# Save the pruned and quantized model.
with open('tflite_model/mobilenet_aware_quantxclustered.tflite', 'wb') as f:
    f.write(quantized_and_clustered_tflite_model)

### See persistence of accuracy from TF to TFLite

In [51]:
interpreter = tf.lite.Interpreter(model_content=quantized_and_clustered_tflite_model)
interpreter.allocate_tensors()

test_accuracy = evaluate_model(interpreter, x_test, y_test)

print('Baseline test accuracy:', baseline_model_accuracy)
print('Clustered TF test accuracy:', clustered_model_accuracy)
print('Clustered and quantized TFLite test_accuracy:', test_accuracy)

Baseline test accuracy: 0.7065
Clustered TF test accuracy: 0.6181
Clustered and quantized TFLite test_accuracy: 0.6117
