In [None]:
#@title Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

## Download Tensorflow and Model Optimization packages

In [None]:
!pip uninstall -y tensorflow
!pip install -q tf-nightly-gpu
!pip install -q tensorflow-model-optimization==0.5
!pip install tensorflow==2.4.0

import tempfile
import os
import numpy as np

import tensorflow as tf

from tensorflow import keras

import tensorflow_model_optimization as tfmot

Uninstalling tensorflow-2.4.0:
  Successfully uninstalled tensorflow-2.4.0
[31mERROR: fancyimpute 0.4.3 requires tensorflow, which is not installed.[0m
Collecting tensorflow==2.4.0
  Using cached https://files.pythonhosted.org/packages/94/0a/012cc33c643d844433d13001dd1db179e7020b05ddbbd0a9dc86c38a8efa/tensorflow-2.4.0-cp37-cp37m-manylinux2010_x86_64.whl
Collecting gast==0.3.3
  Using cached https://files.pythonhosted.org/packages/d6/84/759f5dd23fec8ba71952d97bcc7e2c9d7d63bdc582421f3cd4be845f0c98/gast-0.3.3-py2.py3-none-any.whl
Collecting grpcio~=1.32.0
  Using cached https://files.pythonhosted.org/packages/06/54/1c8be62beafe7fb1548d2968e518ca040556b46b0275399d4f3186c56d79/grpcio-1.32.0-cp37-cp37m-manylinux2014_x86_64.whl
Collecting h5py~=2.10.0
  Using cached https://files.pythonhosted.org/packages/3f/c0/abde58b837e066bca19a3f7332d9d0493521d7dd6b48248451a9e3fe2214/h5py-2.10.0-cp37-cp37m-manylinux1_x86_64.whl
[31mERROR: tf-nightly-gpu 2.5.0.dev20210318 has requirement gast==0.4.0, bu

## Download and split dataset

In [None]:
import tensorflow_datasets as tfds
import tensorflow as tf

BATCH_SIZE = 32
IMAGE_SIZE = 224

def format_image(image, label):
    image = tf.image.resize(image, (IMAGE_SIZE, IMAGE_SIZE)) / 255.0
    return  image, label

(raw_train, raw_validation, raw_test), metadata = tfds.load(
    'cats_vs_dogs',
    data_dir='/content/dataset/',
    split=['train[:80%]', 'train[80%:90%]', 'train[90%:]'],
    with_info=True,
    shuffle_files=True, 
    as_supervised=True,
)

# tf.data.experimental.cardinality(raw_train_dataset).numpy()
num_examples = metadata.splits['train'].num_examples
num_classes = metadata.features['label'].num_classes
print("Numbers of images: ", num_examples)
print("Numbers of classes: ", num_classes)

train_batches = raw_train.shuffle(num_examples // 4).map(format_image).batch(BATCH_SIZE).prefetch(1)
validation_batches = raw_validation.map(format_image).batch(BATCH_SIZE).prefetch(1)
test_batches = raw_test.map(format_image).batch(1)

Numbers of images:  23262
Numbers of classes:  2


## Our model

In [None]:
model = tf.keras.models.Sequential([
    # Note the input shape is the desired size of the image with 3 bytes color
    # This is the first convolution
    tf.keras.layers.Conv2D(16, (3,3), activation='relu', input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3)),
    tf.keras.layers.MaxPooling2D(2, 2),
    # The second convolution
    tf.keras.layers.Conv2D(32, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    # The third convolution
    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    # The fourth convolution
    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    # Flatten the results to feed into a DNN
    tf.keras.layers.Flatten(),
    # 512 neuron hidden layer
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(128, activation='relu'),
    # Output neuron
    tf.keras.layers.Dense(num_classes, activation='softmax')
])
print(model.summary())

from tensorflow.keras.optimizers import RMSprop
model.compile(loss='sparse_categorical_crossentropy',
              optimizer=RMSprop(lr=0.001),
              metrics=['acc'])

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 222, 222, 16)      448       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 111, 111, 16)      0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 109, 109, 32)      4640      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 54, 54, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 52, 52, 64)        18496     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 26, 26, 64)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 24, 24, 64)        3

## Training our Keras model without quantization aware training

we train our Keras model with only one epoch

In [None]:
history = model.fit(
      train_batches,  
      epochs=1,
      validation_data=validation_batches)



##Save the Tensorflow model

In [None]:
CATS_VS_DOGS_SAVED_MODEL = "./exp_saved_model"
tf.saved_model.save(model, CATS_VS_DOGS_SAVED_MODEL)

INFO:tensorflow:Assets written to: ./exp_saved_model/assets


INFO:tensorflow:Assets written to: ./exp_saved_model/assets


In [None]:
import subprocess

def du(path):
    return subprocess.check_output(['du','-sh', path]).split()[0].decode('utf-8')
# Get file size in bytes for a given model
print("Model size in Mb:")
print(du(CATS_VS_DOGS_SAVED_MODEL))

Model size in Mb:
19M


#Clone and fine-tune pre-trained model with quantization aware training

In [None]:
import tensorflow_model_optimization as tfmot

quantize_model = tfmot.quantization.keras.quantize_model

# q_aware stands for for quantization aware.
q_aware_model = quantize_model(model)

# `quantize_model` requires a recompile.
q_aware_model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

q_aware_model.summary()
q_aware_history = q_aware_model.fit(
      train_batches,  
      epochs=1,
      validation_data=validation_batches)

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
quantize_layer (QuantizeLaye (None, 224, 224, 3)       3         
_________________________________________________________________
quant_conv2d (QuantizeWrappe (None, 222, 222, 16)      483       
_________________________________________________________________
quant_max_pooling2d (Quantiz (None, 111, 111, 16)      1         
_________________________________________________________________
quant_conv2d_1 (QuantizeWrap (None, 109, 109, 32)      4707      
_________________________________________________________________
quant_max_pooling2d_1 (Quant (None, 54, 54, 32)        1         
_________________________________________________________________
quant_conv2d_2 (QuantizeWrap (None, 52, 52, 64)        18627     
_________________________________________________________________
quant_max_pooling2d_2 (Quant (None, 26, 26, 64)        1

## Evaluate the model against baseline

In [None]:
_, baseline_model_accuracy = model.evaluate(test_batches, verbose=0)

_, q_aware_model_accuracy = q_aware_model.evaluate(test_batches, verbose=0)

print('Baseline test accuracy:', baseline_model_accuracy)
print('Quant test accuracy:', q_aware_model_accuracy)


Baseline test accuracy: 0.649613082408905
Quant test accuracy: 0.8048151135444641


## Create quantized model for TFLite backend

Quantized model for the baseline Keras model

In [None]:
# Create float TFLite model.
float_converter = tf.lite.TFLiteConverter.from_keras_model(model)
float_tflite_model = float_converter.convert()

# Measure sizes of models.
_, float_file = tempfile.mkstemp('.tflite')
with open(float_file, 'wb') as f:
  f.write(float_tflite_model)

print("Float model in Mb:", os.path.getsize(float_file) / float(2**20))

INFO:tensorflow:Assets written to: /tmp/tmpfad0amis/assets


INFO:tensorflow:Assets written to: /tmp/tmpfad0amis/assets


Float model in Mb: 9.362892150878906


Quantized model for the QAT model

In [None]:
converter = tf.lite.TFLiteConverter.from_keras_model(q_aware_model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
quantized_tflite_model = converter.convert()

_, quant_file = tempfile.mkstemp('.tflite')
with open(quant_file, 'wb') as f:
  f.write(quantized_tflite_model)

print("Quantized model in Mb:", os.path.getsize(quant_file) / float(2**20))




INFO:tensorflow:Assets written to: /tmp/tmpgix8upxz/assets


INFO:tensorflow:Assets written to: /tmp/tmpgix8upxz/assets


Quantized model in Mb: 2.351898193359375


## Note

What can we see in term of the size and the accuracy of models ? We can try some modification to assess the performance:

    By default: converter.optimizations = [tf.lite.Optimize.DEFAULT]
    Opimization for size: Hint : [tf.lite.Optimize.OPTIMIZE_FOR_SIZE]
    Opimization for latency: Hint : [tf.lite.Optimize.OPTIMIZE_FOR_LATENCY]




## Clean Up

Before running the next exercise, run the following cell to terminate the kernel and free memory resources:

In [None]:
import os, signal
os.kill(os.getpid(), signal.SIGKILL)