# EfficientNet Optimization

#### Model creation

In [None]:
!pip install -q tensorflow-model-optimization

In [None]:
import tensorflow as tf
from tensorflow import keras

import numpy as np
import tempfile
import zipfile
import os

In [None]:
from tensorflow.keras.applications import ResNet50, MobileNet, EfficientNetB0
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, BatchNormalization, UpSampling2D, GlobalAveragePooling2D, Dropout
from tensorflow.keras.applications.resnet50 import preprocess_input
from keras.utils import np_utils

In [None]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar100.load_data(label_mode='fine')
x_train = preprocess_input(x_train)
x_test = preprocess_input(x_test)
y_train = np_utils.to_categorical(y_train, 100)
y_test = np_utils.to_categorical(y_test, 100)

#### Create functional model

In [None]:
def get_model():
    efficientNet_imagenet_model = EfficientNetB0(include_top=False, weights='imagenet', input_shape=(32, 32, 3))

    #Flatten output layer of Resnet
    gfp = GlobalAveragePooling2D()(efficientNet_imagenet_model.output)

    #flattened = tf.keras.layers.Flatten()(resnet50_imagenet_model.output)

    #Fully connected layer 1
    fc1 = Dense(256, activation='relu', name="AddedDense1")(gfp)

    #Fully connected layer, output layer
    fc2 = Dense(100, activation='softmax', name="AddedDense2")(fc1)

    model = tf.keras.models.Model(inputs=efficientNet_imagenet_model.input, outputs=fc2)
    return model

In [None]:
model = get_model()

In [None]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

#### Model training

In [None]:
model.fit(x_train, y_train, batch_size=64, epochs=25, validation_data=(x_test, y_test))

# Quantization

#### Convert model to tflite

In [None]:
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

#### Dynamic range quantization

In [None]:
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_model_dynamic_quant = converter.convert()

#### Full integer quantization

In [None]:
def representative_data_gen():
    for input_value in tf.data.Dataset.from_tensor_slices(x_train).batch(1).take(100):
    # Model has only one input so each data point has one element.
        yield [input_value]

converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_data_gen

tflite_model_full_integer_quant = converter.convert()

#### Float 16 quant

In [None]:
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_types = [tf.float16]
tflite_model_float16_quant = converter.convert()

In [None]:
import pathlib

tflite_models_dir = pathlib.Path("/tmp/cifar100_tflite_models/")
tflite_models_dir.mkdir(exist_ok=True, parents=True)

# Save the unquantized/float model:
tflite_model_file = tflite_models_dir/"cifar100_model.tflite"
tflite_model_file.write_bytes(tflite_model)
# Save the quantized model:
tflite_model_dynamic_quant_file = tflite_models_dir/"cifar100_tflite_model_dynamic_quant.tflite"
tflite_model_dynamic_quant_file.write_bytes(tflite_model_dynamic_quant)
# Save the quantized model:
tflite_model_full_integer_quant_file = tflite_models_dir/"cifar100_tflite_model_full_integer_quant.tflite"
tflite_model_full_integer_quant_file.write_bytes(tflite_model_full_integer_quant)
# Save the quantized model:
tflite_model_float16_quant_file = tflite_models_dir/"cifar100_tflite_model_float16_quant.tflite"
tflite_model_float16_quant_file.write_bytes(tflite_model_float16_quant)

#### Check tflite model size

In [None]:
!ls /tmp/cifar100_tflite_models/ -lh

In [None]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar100.load_data(label_mode='fine')
x_train = preprocess_input(x_train)
x_test = preprocess_input(x_test)

In [None]:
test_images = x_train
test_labels = y_train[:,0]
y_test = y_test[:,0]

#### Quantized model evalutation

In [None]:
# Helper function to run inference on a TFLite model
def run_tflite_model(tflite_file, test_image_indices):
    global test_images

    # Initialize the interpreter
    interpreter = tf.lite.Interpreter(model_path=str(tflite_file))
    interpreter.allocate_tensors()

    input_details = interpreter.get_input_details()[0]
    output_details = interpreter.get_output_details()[0]

    predictions = np.zeros((len(test_image_indices),), dtype=int)
    for i, test_image_index in enumerate(test_image_indices):
    #print(i)
        test_image = x_test[test_image_index]
        test_label = y_test[test_image_index]

    # Check if the input type is quantized, then rescale input data to uint8
        if input_details['dtype'] == np.uint8:
            input_scale, input_zero_point = input_details["quantization"]
            test_image = test_image / input_scale + input_zero_point

        test_image = np.expand_dims(test_image, axis=0).astype(input_details["dtype"])
        interpreter.set_tensor(input_details["index"], test_image)
        interpreter.invoke()
        output = interpreter.get_tensor(output_details["index"])[0]

        predictions[i] = output.argmax()

    return predictions

In [None]:
#Check change in accuracy

def evaluate_model(tflite_file, model_type):
    global x_test
    global y_test

    test_image_indices = range(x_test.shape[0])
    predictions = run_tflite_model(tflite_file, test_image_indices)
    #print(predictions)

    accuracy = (np.sum(y_test== predictions) * 100) / len(x_test)

    print('%s model accuracy is %.4f%% (Number of test samples=%d)' % (
      model_type, accuracy, len(x_test)))

In [None]:
import numpy as np
tflite_model = evaluate_model(tflite_model_file, model_type="float")

In [None]:
tflite_dynamic_quant_accuracy = evaluate_model(tflite_model_dynamic_quant_file, model_type="Quantized")

In [None]:
tflite_full_integer_quant_accuracy = evaluate_model(tflite_model_full_integer_quant_file, model_type="Quantized")

In [None]:
tflite_float16_quant_accuracy = evaluate_model(tflite_model_float16_quant_file, model_type="Quantized")

In [None]:
!ls /tmp/cifar100_tflite_models/ -lh

## Pruning

In [None]:
from tensorflow_model_optimization.python.core.sparsity.keras import prune
from tensorflow_model_optimization.python.core.sparsity.keras import pruning_callbacks
from tensorflow_model_optimization.python.core.sparsity.keras import pruning_schedule

In [None]:
def get_gzipped_model_size(file):
    # Returns size of gzipped model, in bytes.
    import os
    import zipfile

    _, zipped_file = tempfile.mkstemp('.zip')
    with zipfile.ZipFile(zipped_file, 'w', compression=zipfile.ZIP_DEFLATED) as f:
        f.write(file)

    return os.path.getsize(zipped_file)

In [None]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar100.load_data(label_mode='fine')
x_train = preprocess_input(x_train)
x_test = preprocess_input(x_test)
y_train = np_utils.to_categorical(y_train, 100)
y_test = np_utils.to_categorical(y_test, 100)

#### Check baseline model accuracy

In [None]:
_, baseline_model_accuracy = model.evaluate(
    x_test, y_test, verbose=0)

print('Baseline test accuracy:', baseline_model_accuracy)

_, keras_file = tempfile.mkstemp('.h5')
tf.keras.models.save_model(model, keras_file, include_optimizer=False)
print('Saved baseline model to:', keras_file)

In [None]:
def apply_pruning_to_dense(layer):
    if isinstance(layer, tf.keras.layers.Dense) or isinstance(layer, tf.keras.layers.Conv2D):
        return tfmot.sparsity.keras.prune_low_magnitude(layer, ps)
    return layer

In [None]:
import tensorflow_model_optimization as tfmot

prune_low_magnitude = tfmot.sparsity.keras.prune_low_magnitude

# Compute end step to finish pruning after 2 epochs.
batch_size = 128
epochs = 2
validation_split = 0.1 # 10% of training set will be used for validation set. 

num_images = x_train.shape[0] * (1 - validation_split)
end_step = np.ceil(num_images / batch_size).astype(np.int32) * epochs

#### Pruning at 0.2 Sparsity 

In [None]:
def apply_pruning_to_dense(layer):
    if isinstance(layer, tf.keras.layers.Dense) or isinstance(layer, tf.keras.layers.Conv2D):
        return tfmot.sparsity.keras.prune_low_magnitude(layer, ps)
    return layer

ps = pruning_schedule.PolynomialDecay(
                 initial_sparsity=0.20, final_sparsity=0.20,
                       begin_step=0, end_step=end_step, frequency=100)

model_for_pruning_2 = tf.keras.models.clone_model(
    model,
    clone_function=apply_pruning_to_dense,
)

# `prune_low_magnitude` requires a recompile.
model_for_pruning_2.compile(optimizer='adam',
              loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

model_for_pruning_2.summary()

In [None]:
model_for_pruning_2.fit(x_train, y_train, batch_size=64, epochs=6, validation_data=(x_test, y_test), callbacks = [pruning_callbacks.UpdatePruningStep()])

In [None]:
model_for_export_2 = tfmot.sparsity.keras.strip_pruning(model_for_pruning_2)
_, pruned_keras_file_2 = tempfile.mkstemp('.h5')
tf.keras.models.save_model(model_for_export_2, pruned_keras_file_2, include_optimizer=False)
print('Saved pruned Keras model to:', pruned_keras_file_2)

In [None]:
converter = tf.lite.TFLiteConverter.from_keras_model(model_for_export_2)
pruned_tflite_model_2 = converter.convert()

_, pruned_tflite_file_2 = tempfile.mkstemp('.tflite')

with open(pruned_tflite_file_2, 'wb') as f:
    f.write(pruned_tflite_model_2)

print('Saved pruned TFLite model to:', pruned_tflite_file_2)
print("Size of gzipped pruned and quantized TFlite model: %.2f bytes" % (get_gzipped_model_size(pruned_tflite_file_2)))

In [None]:
_, model_for_pruning_2_accuracy = model_for_pruning_2.evaluate(
   x_test, y_test, verbose=0)

print('Baseline test accuracy:', baseline_model_accuracy) 
print('Pruned at 0.2 Sparsity test accuracy:', model_for_pruning_2_accuracy)

#### Pruning at 0.4 Sparsity 

In [None]:
def apply_pruning_to_dense(layer):
    if isinstance(layer, tf.keras.layers.Dense) or isinstance(layer, tf.keras.layers.Conv2D):
        return tfmot.sparsity.keras.prune_low_magnitude(layer, ps)
    return layer

ps = pruning_schedule.PolynomialDecay(
                 initial_sparsity=0.40, final_sparsity=0.40,
                       begin_step=0, end_step=end_step, frequency=100)

model_for_pruning_4 = tf.keras.models.clone_model(
    model,
    clone_function=apply_pruning_to_dense,
)

# `prune_low_magnitude` requires a recompile.
model_for_pruning_4.compile(optimizer='adam',
              loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

model_for_pruning_4.summary()

In [None]:
model_for_pruning_4.fit(x_train, y_train, batch_size=64, epochs=6, validation_data=(x_test, y_test), callbacks = [pruning_callbacks.UpdatePruningStep()])

In [None]:
model_for_export_4 = tfmot.sparsity.keras.strip_pruning(model_for_pruning_4)
_, pruned_keras_file_4 = tempfile.mkstemp('.h5')
tf.keras.models.save_model(model_for_export_4, pruned_keras_file_4, include_optimizer=False)
print('Saved pruned Keras model to:', pruned_keras_file_4)

In [None]:
_, model_for_pruning_4_accuracy = model_for_pruning_4.evaluate(
   x_test, y_test, verbose=0)

print('Baseline test accuracy:', baseline_model_accuracy) 
print('Pruned at 0.2 Sparsity test accuracy:', model_for_pruning_2_accuracy)
print('Pruned at 0.4 Sparsity test accuracy:', model_for_pruning_4_accuracy)

In [None]:
converter = tf.lite.TFLiteConverter.from_keras_model(model_for_export_4)
pruned_tflite_model_4 = converter.convert()

_, pruned_tflite_file_4 = tempfile.mkstemp('.tflite')

with open(pruned_tflite_file_4, 'wb') as f:
    f.write(pruned_tflite_model_4)

print('Saved pruned TFLite model to:', pruned_tflite_file_4)
print("Size of gzipped pruned and quantized TFlite model: %.2f bytes" % (get_gzipped_model_size(pruned_tflite_file_2)))
print("Size of gzipped pruned and quantized TFlite model: %.2f bytes" % (get_gzipped_model_size(pruned_tflite_file_4)))

#### Pruning at 0.6 Sparsity 

In [None]:
def apply_pruning_to_dense(layer):
    if isinstance(layer, tf.keras.layers.Dense) or isinstance(layer, tf.keras.layers.Conv2D):
        return tfmot.sparsity.keras.prune_low_magnitude(layer, ps)
    return layer

ps = pruning_schedule.PolynomialDecay(
                 initial_sparsity=0.60, final_sparsity=0.60,
                       begin_step=0, end_step=end_step, frequency=100)

model_for_pruning_6 = tf.keras.models.clone_model(
    model,
    clone_function=apply_pruning_to_dense,
)

# `prune_low_magnitude` requires a recompile.
model_for_pruning_6.compile(optimizer='adam',
              loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

model_for_pruning_6.summary()

In [None]:
model_for_pruning_6.fit(x_train, y_train, batch_size=64, epochs=6, validation_data=(x_test, y_test), callbacks = [pruning_callbacks.UpdatePruningStep()])

In [None]:
model_for_export_6 = tfmot.sparsity.keras.strip_pruning(model_for_pruning_6)
_, pruned_keras_file_6 = tempfile.mkstemp('.h5')
tf.keras.models.save_model(model_for_export_6, pruned_keras_file_6, include_optimizer=False)
print('Saved pruned Keras model to:', pruned_keras_file_6)

In [None]:
converter = tf.lite.TFLiteConverter.from_keras_model(model_for_export_6)
pruned_tflite_model_6 = converter.convert()

_, pruned_tflite_file_6 = tempfile.mkstemp('.tflite')

with open(pruned_tflite_file_6, 'wb') as f:
    f.write(pruned_tflite_model_6)

print('Saved pruned TFLite model to:', pruned_tflite_file_6)
print("Size of gzipped pruned and quantized TFlite model: %.2f bytes" % (get_gzipped_model_size(pruned_tflite_file_2)))
print("Size of gzipped pruned and quantized TFlite model: %.2f bytes" % (get_gzipped_model_size(pruned_tflite_file_4)))
print("Size of gzipped pruned and quantized TFlite model: %.2f bytes" % (get_gzipped_model_size(pruned_tflite_file_6)))

In [None]:
_, model_for_pruning_6_accuracy = model_for_pruning_6.evaluate(
   x_test, y_test, verbose=0)

print('Baseline test accuracy:', baseline_model_accuracy) 
print('Pruned at 0.2 Sparsity test accuracy:', model_for_pruning_2_accuracy)
print('Pruned at 0.4 Sparsity test accuracy:', model_for_pruning_4_accuracy)
print('Pruned at 0.6 Sparsity test accuracy:', model_for_pruning_6_accuracy)

#### Pruning at 0.8 Sparsity 

In [None]:
def apply_pruning_to_dense(layer):
    if isinstance(layer, tf.keras.layers.Dense) or isinstance(layer, tf.keras.layers.Conv2D):
        return tfmot.sparsity.keras.prune_low_magnitude(layer, ps)
    return layer

ps = pruning_schedule.PolynomialDecay(
                 initial_sparsity=0.80, final_sparsity=0.80,
                       begin_step=0, end_step=end_step, frequency=100)

model_for_pruning_8 = tf.keras.models.clone_model(
    model,
    clone_function=apply_pruning_to_dense,
)

# `prune_low_magnitude` requires a recompile.
model_for_pruning_8.compile(optimizer='adam',
              loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

model_for_pruning_8.summary()

In [None]:
model_for_pruning_8.fit(x_train, y_train, batch_size=64, epochs=6, validation_data=(x_test, y_test), callbacks = [pruning_callbacks.UpdatePruningStep()])

In [None]:
_, model_for_pruning_8_accuracy = model_for_pruning_8.evaluate(
   x_test, y_test, verbose=0)

print('Baseline test accuracy:', baseline_model_accuracy) 
print('Pruned at 0.2 Sparsity test accuracy:', model_for_pruning_2_accuracy)
print('Pruned at 0.4 Sparsity test accuracy:', model_for_pruning_4_accuracy)
print('Pruned at 0.6 Sparsity test accuracy:', model_for_pruning_6_accuracy)
print('Pruned at 0.8 Sparsity test accuracy:', model_for_pruning_8_accuracy)

In [None]:
model_for_export_8 = tfmot.sparsity.keras.strip_pruning(model_for_pruning_8)
_, pruned_keras_file_8 = tempfile.mkstemp('.h5')
tf.keras.models.save_model(model_for_export_8, pruned_keras_file_8, include_optimizer=False)
print('Saved pruned Keras model to:', pruned_keras_file_8)

In [None]:
converter = tf.lite.TFLiteConverter.from_keras_model(model_for_export_8)
pruned_tflite_model_8 = converter.convert()

_, pruned_tflite_file_8 = tempfile.mkstemp('.tflite')

with open(pruned_tflite_file_8, 'wb') as f:
    f.write(pruned_tflite_model_8)

print('Saved pruned TFLite model to:', pruned_tflite_file_8)
print("Size of gzipped pruned and quantized TFlite model: %.2f bytes" % (get_gzipped_model_size(pruned_tflite_file_2)))
print("Size of gzipped pruned and quantized TFlite model: %.2f bytes" % (get_gzipped_model_size(pruned_tflite_file_4)))
print("Size of gzipped pruned and quantized TFlite model: %.2f bytes" % (get_gzipped_model_size(pruned_tflite_file_6)))
print("Size of gzipped pruned and quantized TFlite model: %.2f bytes" % (get_gzipped_model_size(pruned_tflite_file_8)))

### Pruning and Quantization

In [None]:
def get_gzipped_model_size(file):
    # Returns size of gzipped model, in bytes.
    import os
    import zipfile

    _, zipped_file = tempfile.mkstemp('.zip')
    with zipfile.ZipFile(zipped_file, 'w', compression=zipfile.ZIP_DEFLATED) as f:
        f.write(file)

    return os.path.getsize(zipped_file)

In [None]:
converter = tf.lite.TFLiteConverter.from_keras_model(model_for_export_6)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
quantized_and_pruned_tflite_model = converter.convert()

_, quantized_and_pruned_tflite_file = tempfile.mkstemp('.tflite')

with open(quantized_and_pruned_tflite_file, 'wb') as f:
    f.write(quantized_and_pruned_tflite_model)

print('Saved quantized and pruned TFLite model to:', quantized_and_pruned_tflite_file)

print("Size of gzipped baseline Keras model: %.2f bytes" % (get_gzipped_model_size(keras_file)))
print("Size of gzipped pruned and quantized TFlite model: %.2f bytes" % (get_gzipped_model_size(quantized_and_pruned_tflite_file)))

In [None]:
print("Size of gzipped baseline Keras model: %.2f bytes" % (get_gzipped_model_size(keras_file)))
print("Size of gzipped pruned TFlite model: %.2f bytes" % (get_gzipped_model_size(pruned_tflite_file_2)))
print("Size of gzipped pruned TFlite model: %.2f bytes" % (get_gzipped_model_size(pruned_tflite_file_4)))
print("Size of gzipped pruned TFlite model: %.2f bytes" % (get_gzipped_model_size(pruned_tflite_file_6)))
print("Size of gzipped pruned TFlite model: %.2f bytes" % (get_gzipped_model_size(pruned_tflite_file_8)))
print("Size of gzipped pruned and quantized TFlite model: %.2f bytes" % (get_gzipped_model_size(quantized_and_pruned_tflite_file)))

In [None]:
y_test = np.argmax(y_test, axis=1)

In [None]:
import numpy as np

def evaluate_model(interpreter):
    prediction_digits = []
    input_index = interpreter.get_input_details()[0]["index"]
    output_index = interpreter.get_output_details()[0]["index"]

  # Run predictions on ever y image in the "test" dataset.
    for i, test_image in enumerate(x_test):
        if i % 1000 == 0:
            print('Evaluated on {n} results so far.'.format(n=i))
        # Pre-processing: add batch dimension and convert to float32 to match with
        # the model's input data format.
        test_image = np.expand_dims(test_image, axis=0).astype(np.float32)
        interpreter.set_tensor(input_index, test_image)

        # Run inference.
        interpreter.invoke()

        # Post-processing: remove batch dimension and find the digit with highest
        # probability.
        output = interpreter.tensor(output_index)
        digit = np.argmax(output()[0])
        prediction_digits.append(digit)

    print('\n')
    # Compare prediction results with ground truth labels to calculate accuracy.
    prediction_digits = np.array(prediction_digits)
    accuracy = (prediction_digits == y_test).mean()
    return accuracy

In [None]:
interpreter = tf.lite.Interpreter(model_content=quantized_and_pruned_tflite_model)
interpreter.allocate_tensors()

test_accuracy = evaluate_model(interpreter)

print('Pruned and quantized TFLite test_accuracy:', test_accuracy)