# EfficientNet Optimization

#### Model creation

In [2]:
!pip install -q tensorflow-model-optimization

In [3]:
import tensorflow as tf
from tensorflow import keras

import numpy as np
import tempfile
import zipfile
import os

In [4]:
from tensorflow.keras.applications import ResNet50, MobileNet, EfficientNetB0
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, BatchNormalization, UpSampling2D, GlobalAveragePooling2D, Dropout
from tensorflow.keras.applications.resnet50 import preprocess_input
from keras.utils import np_utils

Using TensorFlow backend.


In [5]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar100.load_data(label_mode='fine')
x_train = preprocess_input(x_train)
x_test = preprocess_input(x_test)
y_train = np_utils.to_categorical(y_train, 100)
y_test = np_utils.to_categorical(y_test, 100)

#### Create functional model

In [7]:
def get_model():
    efficientNet_imagenet_model = EfficientNetB0(include_top=False, weights='imagenet', input_shape=(32, 32, 3))

    #Flatten output layer of Resnet
    gfp = GlobalAveragePooling2D()(efficientNet_imagenet_model.output)

    #flattened = tf.keras.layers.Flatten()(resnet50_imagenet_model.output)

    #Fully connected layer 1
    fc1 = Dense(256, activation='relu', name="AddedDense1")(gfp)

    #Fully connected layer, output layer
    fc2 = Dense(100, activation='softmax', name="AddedDense2")(fc1)

    model = tf.keras.models.Model(inputs=efficientNet_imagenet_model.input, outputs=fc2)
    return model

In [8]:
model = get_model()

In [9]:
# sgd = SGD(lr=learning_rate, momentum=momentum, decay=decay_rate, nesterov=False)
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

#### Model training

In [10]:
model.fit(x_train, y_train, batch_size=64, epochs=25, validation_data=(x_test, y_test))

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<tensorflow.python.keras.callbacks.History at 0x7fdd0cca3a10>

# Quantization

#### Convert model to tflite

In [11]:
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
INFO:tensorflow:Assets written to: /tmp/tmpdf7_eh7p/assets


#### Dynamic range quantization
The simplest form of post-training quantization statically quantizes only the weights from floating point to integer, which has 8-bits of precision:

In [12]:
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_model_dynamic_quant = converter.convert()

INFO:tensorflow:Assets written to: /tmp/tmp64s7jpbq/assets


INFO:tensorflow:Assets written to: /tmp/tmp64s7jpbq/assets


#### Full integer quantization

In [13]:
def representative_data_gen():
    for input_value in tf.data.Dataset.from_tensor_slices(x_train).batch(1).take(100):
    # Model has only one input so each data point has one element.
        yield [input_value]

converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_data_gen

tflite_model_full_integer_quant = converter.convert()

INFO:tensorflow:Assets written to: /tmp/tmpuh1x2s1u/assets


INFO:tensorflow:Assets written to: /tmp/tmpuh1x2s1u/assets


#### Float 16 quant

In [14]:
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_types = [tf.float16]
tflite_model_float16_quant = converter.convert()

INFO:tensorflow:Assets written to: /tmp/tmp0a47srxv/assets


INFO:tensorflow:Assets written to: /tmp/tmp0a47srxv/assets


In [15]:
import pathlib

tflite_models_dir = pathlib.Path("/tmp/cifar100_tflite_models/")
tflite_models_dir.mkdir(exist_ok=True, parents=True)

# Save the unquantized/float model:
tflite_model_file = tflite_models_dir/"cifar100_model.tflite"
tflite_model_file.write_bytes(tflite_model)
# Save the quantized model:
tflite_model_dynamic_quant_file = tflite_models_dir/"cifar100_tflite_model_dynamic_quant.tflite"
tflite_model_dynamic_quant_file.write_bytes(tflite_model_dynamic_quant)
# Save the quantized model:
tflite_model_full_integer_quant_file = tflite_models_dir/"cifar100_tflite_model_full_integer_quant.tflite"
tflite_model_full_integer_quant_file.write_bytes(tflite_model_full_integer_quant)
# Save the quantized model:
tflite_model_float16_quant_file = tflite_models_dir/"cifar100_tflite_model_float16_quant.tflite"
tflite_model_float16_quant_file.write_bytes(tflite_model_float16_quant)

8803072

#### Check tflite model size

In [16]:
!ls /tmp/cifar100_tflite_models/ -lh

total 35M
-rw-r--r-- 1 jupyter jupyter  17M Apr 22 00:36 cifar100_model.tflite
-rw-r--r-- 1 jupyter jupyter 4.4M Apr 22 00:36 cifar100_tflite_model_dynamic_quant.tflite
-rw-r--r-- 1 jupyter jupyter 8.4M Apr 22 00:36 cifar100_tflite_model_float16_quant.tflite
-rw-r--r-- 1 jupyter jupyter 5.3M Apr 22 00:36 cifar100_tflite_model_full_integer_quant.tflite


In [17]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar100.load_data(label_mode='fine')
x_train = preprocess_input(x_train)
x_test = preprocess_input(x_test)

In [24]:
test_images = x_train
test_labels = y_train[:,0]
# test_labels = y_train[:1000]

y_test = y_test[:,0]

#### Quantized model evalutation

In [25]:
# Helper function to run inference on a TFLite model
def run_tflite_model(tflite_file, test_image_indices):
    global test_images

    # Initialize the interpreter
    interpreter = tf.lite.Interpreter(model_path=str(tflite_file))
    interpreter.allocate_tensors()

    input_details = interpreter.get_input_details()[0]
    output_details = interpreter.get_output_details()[0]

    predictions = np.zeros((len(test_image_indices),), dtype=int)
    for i, test_image_index in enumerate(test_image_indices):
    #print(i)
        test_image = x_test[test_image_index]
        test_label = y_test[test_image_index]

    # Check if the input type is quantized, then rescale input data to uint8
        if input_details['dtype'] == np.uint8:
            input_scale, input_zero_point = input_details["quantization"]
            test_image = test_image / input_scale + input_zero_point

        test_image = np.expand_dims(test_image, axis=0).astype(input_details["dtype"])
        interpreter.set_tensor(input_details["index"], test_image)
        interpreter.invoke()
        output = interpreter.get_tensor(output_details["index"])[0]

        predictions[i] = output.argmax()

    return predictions

In [26]:
#Check change in accuracy

def evaluate_model(tflite_file, model_type):
    global x_test
    global y_test

    test_image_indices = range(x_test.shape[0])
    predictions = run_tflite_model(tflite_file, test_image_indices)
    #print(predictions)

    accuracy = (np.sum(y_test== predictions) * 100) / len(x_test)

    print('%s model accuracy is %.4f%% (Number of test samples=%d)' % (
      model_type, accuracy, len(x_test)))

In [27]:
import numpy as np
tflite_model = evaluate_model(tflite_model_file, model_type="float")

float model accuracy is 56.5500% (Number of test samples=10000)


In [28]:
tflite_dynamic_quant_accuracy = evaluate_model(tflite_model_dynamic_quant_file, model_type="Quantized")

Quantized model accuracy is 38.7400% (Number of test samples=10000)


In [29]:
tflite_full_integer_quant_accuracy = evaluate_model(tflite_model_full_integer_quant_file, model_type="Quantized")

Quantized model accuracy is 51.2900% (Number of test samples=10000)


In [30]:
tflite_float16_quant_accuracy = evaluate_model(tflite_model_float16_quant_file, model_type="Quantized")

Quantized model accuracy is 56.5800% (Number of test samples=10000)


In [31]:
!ls /tmp/cifar100_tflite_models/ -lh

total 35M
-rw-r--r-- 1 jupyter jupyter  17M Apr 22 00:36 cifar100_model.tflite
-rw-r--r-- 1 jupyter jupyter 4.4M Apr 22 00:36 cifar100_tflite_model_dynamic_quant.tflite
-rw-r--r-- 1 jupyter jupyter 8.4M Apr 22 00:36 cifar100_tflite_model_float16_quant.tflite
-rw-r--r-- 1 jupyter jupyter 5.3M Apr 22 00:36 cifar100_tflite_model_full_integer_quant.tflite


## Pruning

In [11]:
from tensorflow_model_optimization.python.core.sparsity.keras import prune
from tensorflow_model_optimization.python.core.sparsity.keras import pruning_callbacks
from tensorflow_model_optimization.python.core.sparsity.keras import pruning_schedule

In [19]:
def get_gzipped_model_size(file):
    # Returns size of gzipped model, in bytes.
    import os
    import zipfile

    _, zipped_file = tempfile.mkstemp('.zip')
    with zipfile.ZipFile(zipped_file, 'w', compression=zipfile.ZIP_DEFLATED) as f:
        f.write(file)

    return os.path.getsize(zipped_file)

In [12]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar100.load_data(label_mode='fine')
x_train = preprocess_input(x_train)
x_test = preprocess_input(x_test)
y_train = np_utils.to_categorical(y_train, 100)
y_test = np_utils.to_categorical(y_test, 100)

#### Check baseline model accuracy

In [13]:
_, baseline_model_accuracy = model.evaluate(
    x_test, y_test, verbose=0)

print('Baseline test accuracy:', baseline_model_accuracy)

_, keras_file = tempfile.mkstemp('.h5')
tf.keras.models.save_model(model, keras_file, include_optimizer=False)
print('Saved baseline model to:', keras_file)

Baseline test accuracy: 0.5666999816894531
Saved baseline model to: /tmp/tmpnbtuzij9.h5


In [35]:
!ls /tmp/tmpkw6weg2f.h5 -lh

-rw------- 1 jupyter jupyter 18M Apr 21 20:46 /tmp/tmpkw6weg2f.h5


In [60]:
def apply_pruning_to_dense(layer):
    if isinstance(layer, tf.keras.layers.Dense) or isinstance(layer, tf.keras.layers.Conv2D):
        return tfmot.sparsity.keras.prune_low_magnitude(layer, ps)
    return layer

In [14]:
import tensorflow_model_optimization as tfmot

prune_low_magnitude = tfmot.sparsity.keras.prune_low_magnitude

# Compute end step to finish pruning after 2 epochs.
batch_size = 128
epochs = 2
validation_split = 0.1 # 10% of training set will be used for validation set. 

num_images = x_train.shape[0] * (1 - validation_split)
end_step = np.ceil(num_images / batch_size).astype(np.int32) * epochs

#### Pruning at 0.2 Sparsity 

In [88]:
def apply_pruning_to_dense(layer):
    if isinstance(layer, tf.keras.layers.Dense) or isinstance(layer, tf.keras.layers.Conv2D):
        return tfmot.sparsity.keras.prune_low_magnitude(layer, ps)
    return layer

ps = pruning_schedule.PolynomialDecay(
                 initial_sparsity=0.20, final_sparsity=0.20,
                       begin_step=0, end_step=end_step, frequency=100)

model_for_pruning_2 = tf.keras.models.clone_model(
    model,
    clone_function=apply_pruning_to_dense,
)

# `prune_low_magnitude` requires a recompile.
model_for_pruning_2.compile(optimizer='adam',
              loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

model_for_pruning_2.summary()

Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            [(None, 32, 32, 3)]  0                                            
__________________________________________________________________________________________________
rescaling_1 (Rescaling)         (None, 32, 32, 3)    0           input_2[0][0]                    
__________________________________________________________________________________________________
normalization_1 (Normalization) (None, 32, 32, 3)    7           rescaling_1[17][0]               
__________________________________________________________________________________________________
stem_conv_pad (ZeroPadding2D)   (None, 33, 33, 3)    0           normalization_1[17][0]           
_______________________________________________________________________________________

In [89]:
model_for_pruning_2.fit(x_train, y_train, batch_size=64, epochs=6, validation_data=(x_test, y_test), callbacks = [pruning_callbacks.UpdatePruningStep()])

Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


<tensorflow.python.keras.callbacks.History at 0x7f4a8be07610>

In [90]:
model_for_export_2 = tfmot.sparsity.keras.strip_pruning(model_for_pruning_2)
_, pruned_keras_file_2 = tempfile.mkstemp('.h5')
tf.keras.models.save_model(model_for_export_2, pruned_keras_file_2, include_optimizer=False)
print('Saved pruned Keras model to:', pruned_keras_file_2)

Saved pruned Keras model to: /tmp/tmpqkmvx0f_.h5


In [91]:
converter = tf.lite.TFLiteConverter.from_keras_model(model_for_export_2)
pruned_tflite_model_2 = converter.convert()

_, pruned_tflite_file_2 = tempfile.mkstemp('.tflite')

with open(pruned_tflite_file_2, 'wb') as f:
    f.write(pruned_tflite_model_2)

print('Saved pruned TFLite model to:', pruned_tflite_file_2)
print("Size of gzipped pruned and quantized TFlite model: %.2f bytes" % (get_gzipped_model_size(pruned_tflite_file_2)))

INFO:tensorflow:Assets written to: /tmp/tmp39zfticp/assets


INFO:tensorflow:Assets written to: /tmp/tmp39zfticp/assets


Saved pruned TFLite model to: /tmp/tmpkka9cajx.tflite
Size of gzipped pruned and quantized TFlite model: 14383280.00 bytes


In [92]:
_, model_for_pruning_2_accuracy = model_for_pruning_2.evaluate(
   x_test, y_test, verbose=0)

print('Baseline test accuracy:', baseline_model_accuracy) 
print('Pruned at 0.2 Sparsity test accuracy:', model_for_pruning_2_accuracy)

Baseline test accuracy: 0.565500020980835
Pruned at 0.2 Sparsity test accuracy: 0.4530999958515167


#### Pruning at 0.4 Sparsity 

In [93]:
def apply_pruning_to_dense(layer):
    if isinstance(layer, tf.keras.layers.Dense) or isinstance(layer, tf.keras.layers.Conv2D):
        return tfmot.sparsity.keras.prune_low_magnitude(layer, ps)
    return layer

ps = pruning_schedule.PolynomialDecay(
                 initial_sparsity=0.40, final_sparsity=0.40,
                       begin_step=0, end_step=end_step, frequency=100)

model_for_pruning_4 = tf.keras.models.clone_model(
    model,
    clone_function=apply_pruning_to_dense,
)

# `prune_low_magnitude` requires a recompile.
model_for_pruning_4.compile(optimizer='adam',
              loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

model_for_pruning_4.summary()

Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            [(None, 32, 32, 3)]  0                                            
__________________________________________________________________________________________________
rescaling_1 (Rescaling)         (None, 32, 32, 3)    0           input_2[0][0]                    
__________________________________________________________________________________________________
normalization_1 (Normalization) (None, 32, 32, 3)    7           rescaling_1[19][0]               
__________________________________________________________________________________________________
stem_conv_pad (ZeroPadding2D)   (None, 33, 33, 3)    0           normalization_1[19][0]           
_______________________________________________________________________________________

In [94]:
model_for_pruning_4.fit(x_train, y_train, batch_size=64, epochs=6, validation_data=(x_test, y_test), callbacks = [pruning_callbacks.UpdatePruningStep()])

Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


<tensorflow.python.keras.callbacks.History at 0x7f4a8600b350>

In [95]:
model_for_export_4 = tfmot.sparsity.keras.strip_pruning(model_for_pruning_4)
_, pruned_keras_file_4 = tempfile.mkstemp('.h5')
tf.keras.models.save_model(model_for_export_4, pruned_keras_file_4, include_optimizer=False)
print('Saved pruned Keras model to:', pruned_keras_file_4)

Saved pruned Keras model to: /tmp/tmpd3ezdy20.h5


In [96]:
_, model_for_pruning_4_accuracy = model_for_pruning_4.evaluate(
   x_test, y_test, verbose=0)

print('Baseline test accuracy:', baseline_model_accuracy) 
print('Pruned at 0.2 Sparsity test accuracy:', model_for_pruning_2_accuracy)
print('Pruned at 0.4 Sparsity test accuracy:', model_for_pruning_4_accuracy)

Baseline test accuracy: 0.565500020980835
Pruned at 0.2 Sparsity test accuracy: 0.4530999958515167
Pruned at 0.4 Sparsity test accuracy: 0.48170000314712524


In [101]:
converter = tf.lite.TFLiteConverter.from_keras_model(model_for_export_4)
pruned_tflite_model_4 = converter.convert()

_, pruned_tflite_file_4 = tempfile.mkstemp('.tflite')

with open(pruned_tflite_file_4, 'wb') as f:
    f.write(pruned_tflite_model_4)

print('Saved pruned TFLite model to:', pruned_tflite_file_4)
print("Size of gzipped pruned and quantized TFlite model: %.2f bytes" % (get_gzipped_model_size(pruned_tflite_file_2)))
print("Size of gzipped pruned and quantized TFlite model: %.2f bytes" % (get_gzipped_model_size(pruned_tflite_file_4)))

INFO:tensorflow:Assets written to: /tmp/tmpdevbci8p/assets


INFO:tensorflow:Assets written to: /tmp/tmpdevbci8p/assets


Saved pruned TFLite model to: /tmp/tmpq3u90g6j.tflite
Size of gzipped pruned and quantized TFlite model: 14383280.00 bytes
Size of gzipped pruned and quantized TFlite model: 8997139.00 bytes


#### Pruning at 0.6 Sparsity 

In [15]:
def apply_pruning_to_dense(layer):
    if isinstance(layer, tf.keras.layers.Dense) or isinstance(layer, tf.keras.layers.Conv2D):
        return tfmot.sparsity.keras.prune_low_magnitude(layer, ps)
    return layer

ps = pruning_schedule.PolynomialDecay(
                 initial_sparsity=0.60, final_sparsity=0.60,
                       begin_step=0, end_step=end_step, frequency=100)

model_for_pruning_6 = tf.keras.models.clone_model(
    model,
    clone_function=apply_pruning_to_dense,
)

# `prune_low_magnitude` requires a recompile.
model_for_pruning_6.compile(optimizer='adam',
              loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

model_for_pruning_6.summary()

Instructions for updating:
Please use `layer.add_weight` method instead.
Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            [(None, 32, 32, 3)]  0                                            
__________________________________________________________________________________________________
rescaling_1 (Rescaling)         (None, 32, 32, 3)    0           input_2[0][0]                    
__________________________________________________________________________________________________
normalization_1 (Normalization) (None, 32, 32, 3)    7           rescaling_1[1][0]                
__________________________________________________________________________________________________
stem_conv_pad (ZeroPadding2D)   (None, 33, 33, 3)    0           normalization_1[1][0]            
______________

In [16]:
model_for_pruning_6.fit(x_train, y_train, batch_size=64, epochs=6, validation_data=(x_test, y_test), callbacks = [pruning_callbacks.UpdatePruningStep()])

Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


<tensorflow.python.keras.callbacks.History at 0x7fd9203f9a90>

In [17]:
model_for_export_6 = tfmot.sparsity.keras.strip_pruning(model_for_pruning_6)
_, pruned_keras_file_6 = tempfile.mkstemp('.h5')
tf.keras.models.save_model(model_for_export_6, pruned_keras_file_6, include_optimizer=False)
print('Saved pruned Keras model to:', pruned_keras_file_6)

Saved pruned Keras model to: /tmp/tmpxl7jn1rh.h5


In [22]:
converter = tf.lite.TFLiteConverter.from_keras_model(model_for_export_6)
pruned_tflite_model_6 = converter.convert()

_, pruned_tflite_file_6 = tempfile.mkstemp('.tflite')

with open(pruned_tflite_file_6, 'wb') as f:
    f.write(pruned_tflite_model_6)

print('Saved pruned TFLite model to:', pruned_tflite_file_6)
# print("Size of gzipped pruned and quantized TFlite model: %.2f bytes" % (get_gzipped_model_size(pruned_tflite_file_2)))
# print("Size of gzipped pruned and quantized TFlite model: %.2f bytes" % (get_gzipped_model_size(pruned_tflite_file_4)))
print("Size of gzipped pruned and quantized TFlite model: %.2f bytes" % (get_gzipped_model_size(pruned_tflite_file_6)))

INFO:tensorflow:Assets written to: /tmp/tmpkrt3_6pw/assets


INFO:tensorflow:Assets written to: /tmp/tmpkrt3_6pw/assets


Saved pruned TFLite model to: /tmp/tmp2kb785iq.tflite
Size of gzipped pruned and quantized TFlite model: 8907159.00 bytes


In [23]:
_, model_for_pruning_6_accuracy = model_for_pruning_6.evaluate(
   x_test, y_test, verbose=0)

print('Baseline test accuracy:', baseline_model_accuracy) 
# print('Pruned at 0.2 Sparsity test accuracy:', model_for_pruning_2_accuracy)
# print('Pruned at 0.4 Sparsity test accuracy:', model_for_pruning_4_accuracy)
print('Pruned at 0.6 Sparsity test accuracy:', model_for_pruning_6_accuracy)

Baseline test accuracy: 0.5666999816894531
Pruned at 0.6 Sparsity test accuracy: 0.5544999837875366


In [53]:
!ls /tmp/tmppanw888p.h5 -lh

-rw------- 1 jupyter jupyter 18M Apr 21 21:20 /tmp/tmppanw888p.h5


#### Pruning at 0.8 Sparsity 

In [24]:
def apply_pruning_to_dense(layer):
    if isinstance(layer, tf.keras.layers.Dense) or isinstance(layer, tf.keras.layers.Conv2D):
        return tfmot.sparsity.keras.prune_low_magnitude(layer, ps)
    return layer

ps = pruning_schedule.PolynomialDecay(
                 initial_sparsity=0.80, final_sparsity=0.80,
                       begin_step=0, end_step=end_step, frequency=100)

model_for_pruning_8 = tf.keras.models.clone_model(
    model,
    clone_function=apply_pruning_to_dense,
)

# `prune_low_magnitude` requires a recompile.
model_for_pruning_8.compile(optimizer='adam',
              loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

model_for_pruning_8.summary()

Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            [(None, 32, 32, 3)]  0                                            
__________________________________________________________________________________________________
rescaling_1 (Rescaling)         (None, 32, 32, 3)    0           input_2[0][0]                    
__________________________________________________________________________________________________
normalization_1 (Normalization) (None, 32, 32, 3)    7           rescaling_1[3][0]                
__________________________________________________________________________________________________
stem_conv_pad (ZeroPadding2D)   (None, 33, 33, 3)    0           normalization_1[3][0]            
_______________________________________________________________________________________

In [25]:
model_for_pruning_8.fit(x_train, y_train, batch_size=64, epochs=6, validation_data=(x_test, y_test), callbacks = [pruning_callbacks.UpdatePruningStep()])

Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


<tensorflow.python.keras.callbacks.History at 0x7fd8f4e4f5d0>

In [26]:
_, model_for_pruning_8_accuracy = model_for_pruning_8.evaluate(
   x_test, y_test, verbose=0)

# print('Baseline test accuracy:', baseline_model_accuracy) 
# print('Pruned at 0.2 Sparsity test accuracy:', model_for_pruning_2_accuracy)
# print('Pruned at 0.4 Sparsity test accuracy:', model_for_pruning_4_accuracy)
print('Pruned at 0.6 Sparsity test accuracy:', model_for_pruning_6_accuracy)
print('Pruned at 0.8 Sparsity test accuracy:', model_for_pruning_8_accuracy)

Pruned at 0.6 Sparsity test accuracy: 0.5544999837875366
Pruned at 0.8 Sparsity test accuracy: 0.5059000253677368


In [27]:
model_for_export_8 = tfmot.sparsity.keras.strip_pruning(model_for_pruning_8)
_, pruned_keras_file_8 = tempfile.mkstemp('.h5')
tf.keras.models.save_model(model_for_export_8, pruned_keras_file_8, include_optimizer=False)
print('Saved pruned Keras model to:', pruned_keras_file_8)

Saved pruned Keras model to: /tmp/tmpi82jouta.h5


In [57]:
!ls /tmp/tmpoy2b62fo.h5 -lh

-rw------- 1 jupyter jupyter 18M Apr 21 21:20 /tmp/tmpoy2b62fo.h5


#### Convert models to tflite

In [28]:
converter = tf.lite.TFLiteConverter.from_keras_model(model_for_export_8)
pruned_tflite_model_8 = converter.convert()

_, pruned_tflite_file_8 = tempfile.mkstemp('.tflite')

with open(pruned_tflite_file_8, 'wb') as f:
    f.write(pruned_tflite_model_8)

print('Saved pruned TFLite model to:', pruned_tflite_file_8)
print("Size of gzipped pruned and quantized TFlite model: %.2f bytes" % (get_gzipped_model_size(pruned_tflite_file_2)))
print("Size of gzipped pruned and quantized TFlite model: %.2f bytes" % (get_gzipped_model_size(pruned_tflite_file_4)))
print("Size of gzipped pruned and quantized TFlite model: %.2f bytes" % (get_gzipped_model_size(pruned_tflite_file_6)))
print("Size of gzipped pruned and quantized TFlite model: %.2f bytes" % (get_gzipped_model_size(pruned_tflite_file_8)))

INFO:tensorflow:Assets written to: /tmp/tmpd4w7cdsj/assets


INFO:tensorflow:Assets written to: /tmp/tmpd4w7cdsj/assets


Saved pruned TFLite model to: /tmp/tmpszu1bvlw.tflite
Size of gzipped pruned and quantized TFlite model: 8907159.00 bytes
Size of gzipped pruned and quantized TFlite model: 5756089.00 bytes


### Pruning and Quantization

In [None]:
def get_gzipped_model_size(file):
    # Returns size of gzipped model, in bytes.
    import os
    import zipfile

    _, zipped_file = tempfile.mkstemp('.zip')
    with zipfile.ZipFile(zipped_file, 'w', compression=zipfile.ZIP_DEFLATED) as f:
        f.write(file)

    return os.path.getsize(zipped_file)

In [29]:
converter = tf.lite.TFLiteConverter.from_keras_model(model_for_export_6)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
quantized_and_pruned_tflite_model = converter.convert()

_, quantized_and_pruned_tflite_file = tempfile.mkstemp('.tflite')

with open(quantized_and_pruned_tflite_file, 'wb') as f:
    f.write(quantized_and_pruned_tflite_model)

print('Saved quantized and pruned TFLite model to:', quantized_and_pruned_tflite_file)

print("Size of gzipped baseline Keras model: %.2f bytes" % (get_gzipped_model_size(keras_file)))
print("Size of gzipped pruned and quantized TFlite model: %.2f bytes" % (get_gzipped_model_size(quantized_and_pruned_tflite_file)))

INFO:tensorflow:Assets written to: /tmp/tmpv7b2t0j4/assets


INFO:tensorflow:Assets written to: /tmp/tmpv7b2t0j4/assets


Saved quantized and pruned TFLite model to: /tmp/tmpw67za2sw.tflite
Size of gzipped baseline Keras model: 16421810.00 bytes
Size of gzipped pruned and quantized TFlite model: 1638074.00 bytes


In [30]:
print("Size of gzipped baseline Keras model: %.2f bytes" % (get_gzipped_model_size(keras_file)))
print("Size of gzipped pruned TFlite model: %.2f bytes" % (get_gzipped_model_size(pruned_tflite_file_2)))
print("Size of gzipped pruned TFlite model: %.2f bytes" % (get_gzipped_model_size(pruned_tflite_file_4)))
print("Size of gzipped pruned TFlite model: %.2f bytes" % (get_gzipped_model_size(pruned_tflite_file_6)))
print("Size of gzipped pruned TFlite model: %.2f bytes" % (get_gzipped_model_size(pruned_tflite_file_8)))
print("Size of gzipped pruned and quantized TFlite model: %.2f bytes" % (get_gzipped_model_size(quantized_and_pruned_tflite_file)))

Size of gzipped baseline Keras model: 16421810.00 bytes
Size of gzipped pruned TFlite model: 8907159.00 bytes
Size of gzipped pruned TFlite model: 5756089.00 bytes
Size of gzipped pruned and quantized TFlite model: 1638074.00 bytes


In [35]:
y_test = np.argmax(y_test, axis=1)

In [34]:
import numpy as np

def evaluate_model(interpreter):
#     global prediction_digits
    prediction_digits = []
    input_index = interpreter.get_input_details()[0]["index"]
    output_index = interpreter.get_output_details()[0]["index"]

  # Run predictions on ever y image in the "test" dataset.
    for i, test_image in enumerate(x_test):
        if i % 1000 == 0:
            print('Evaluated on {n} results so far.'.format(n=i))
        # Pre-processing: add batch dimension and convert to float32 to match with
        # the model's input data format.
        test_image = np.expand_dims(test_image, axis=0).astype(np.float32)
        interpreter.set_tensor(input_index, test_image)

        # Run inference.
        interpreter.invoke()

        # Post-processing: remove batch dimension and find the digit with highest
        # probability.
        output = interpreter.tensor(output_index)
        digit = np.argmax(output()[0])
        prediction_digits.append(digit)

    print('\n')
    # Compare prediction results with ground truth labels to calculate accuracy.
    prediction_digits = np.array(prediction_digits)
    accuracy = (prediction_digits == y_test).mean()
    return accuracy

In [36]:
interpreter = tf.lite.Interpreter(model_content=quantized_and_pruned_tflite_model)
interpreter.allocate_tensors()

test_accuracy = evaluate_model(interpreter)

print('Pruned and quantized TFLite test_accuracy:', test_accuracy)

Evaluated on 0 results so far.
Evaluated on 1000 results so far.
Evaluated on 2000 results so far.
Evaluated on 3000 results so far.
Evaluated on 4000 results so far.
Evaluated on 5000 results so far.
Evaluated on 6000 results so far.
Evaluated on 7000 results so far.
Evaluated on 8000 results so far.
Evaluated on 9000 results so far.


Pruned and quantized TFLite test_accuracy: 0.4219
