In [2]:
!pip uninstall -y tensorflow
!pip install -q tf-nightly
!pip install -q tensorflow-model-optimization

Uninstalling tensorflow-2.4.1:
  Successfully uninstalled tensorflow-2.4.1
[K     |████████████████████████████████| 452.9MB 38kB/s 
[K     |████████████████████████████████| 5.9MB 23.7MB/s 
[K     |████████████████████████████████| 4.0MB 45.3MB/s 
[K     |████████████████████████████████| 4.2MB 45.7MB/s 
[K     |████████████████████████████████| 471kB 53.0MB/s 
[K     |████████████████████████████████| 1.3MB 25.1MB/s 
[K     |████████████████████████████████| 3.9MB 52.1MB/s 
[31mERROR: fancyimpute 0.4.3 requires tensorflow, which is not installed.[0m
[K     |████████████████████████████████| 174kB 18.9MB/s 
[?25h

In [1]:
import tensorflow as tf
import numpy as np
import tensorflow_model_optimization as tfmot
from keras.datasets import cifar10

### Loading CIFAR-10 dataset

In [2]:
(trainX, trainy), (testX, testy) = cifar10.load_data()

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz


In [3]:
# print to make sure we have the correct shapes + number of images for training
print("number of train pictures:", trainX.shape)
print("number of trained picture values:", trainy.shape)
# divide by 255 to make [0,255] into [0,1] + print to make sure!
trainy = tf.keras.utils.to_categorical(trainy,10)
testy = tf.keras.utils.to_categorical(testy,10)
trainX = trainX/255.0
testX = testX/255.0

number of train pictures: (50000, 32, 32, 3)
number of trained picture values: (50000, 1)


### vgg16 without quantization

In [27]:
# Define the model architecture.
model_vgg16 = tf.keras.Sequential([
  tf.keras.layers.InputLayer(input_shape=(32, 32, 3)),
  tf.keras.layers.Reshape(target_shape=(32, 32, 3)),
  #block-1
  tf.keras.layers.Conv2D(filters=64, kernel_size=(3, 3),padding="same", activation='relu'),
  tf.keras.layers.Conv2D(filters=64, kernel_size=(3, 3),padding="same", activation='relu'),
  tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),

  #block-2
  tf.keras.layers.Conv2D(filters=128, kernel_size=(3, 3),padding="same", activation='relu'),
  tf.keras.layers.Conv2D(filters=128, kernel_size=(3, 3),padding="same", activation='relu'),
  tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),

  #block-3
  tf.keras.layers.Conv2D(filters=256, kernel_size=(3, 3),padding="same", activation='relu'),
  tf.keras.layers.Conv2D(filters=256, kernel_size=(3, 3),padding="same", activation='relu'),
  tf.keras.layers.Conv2D(filters=256, kernel_size=(3, 3),padding="same", activation='relu'),
  tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),

  #block-4
  tf.keras.layers.Conv2D(filters=512, kernel_size=(3, 3),padding="same", activation='relu'),
  tf.keras.layers.Conv2D(filters=512, kernel_size=(3, 3),padding="same", activation='relu'),
  tf.keras.layers.Conv2D(filters=512, kernel_size=(3, 3),padding="same", activation='relu'),
  tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),

  #block-5
  tf.keras.layers.Conv2D(filters=512, kernel_size=(3, 3),padding="same", activation='relu'),
  tf.keras.layers.Conv2D(filters=512, kernel_size=(3, 3),padding="same", activation='relu'),
  tf.keras.layers.Conv2D(filters=512, kernel_size=(3, 3),padding="same", activation='relu'),
  tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),

  tf.keras.layers.Flatten(),
  tf.keras.layers.Dense(4096),
  tf.keras.layers.Dense(4096),
  tf.keras.layers.Dense(10)
])
model_vgg16.summary()

Model: "sequential_10"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
reshape_11 (Reshape)         (None, 32, 32, 3)         0         
_________________________________________________________________
conv2d_130 (Conv2D)          (None, 32, 32, 64)        1792      
_________________________________________________________________
conv2d_131 (Conv2D)          (None, 32, 32, 64)        36928     
_________________________________________________________________
max_pooling2d_50 (MaxPooling (None, 16, 16, 64)        0         
_________________________________________________________________
conv2d_132 (Conv2D)          (None, 16, 16, 128)       73856     
_________________________________________________________________
conv2d_133 (Conv2D)          (None, 16, 16, 128)       147584    
_________________________________________________________________
max_pooling2d_51 (MaxPooling (None, 8, 8, 128)       

### Training on vanilla vgg-16

In [65]:
# from keras.optimizers import Adam
# from keras.optimizers import SGD

# Compile the model
model_vgg16.compile(optimizer='adam', loss=tf.keras.losses.categorical_crossentropy,metrics=['accuracy'])

# Fit data to model
model_vgg16.fit(trainX[:200], trainy[:200],
          batch_size=50,
          epochs=1,
          validation_split=0.1,
          )



<tensorflow.python.keras.callbacks.History at 0x7fd80729c850>

### Defining the quantization functions

In [100]:
LastValueQuantizer = tfmot.quantization.keras.quantizers.LastValueQuantizer
MovingAverageQuantizer = tfmot.quantization.keras.quantizers.MovingAverageQuantizer

num_bits_wights = 16
num_bits_activation = 16

class LayerQuantizeConfig(tfmot.quantization.keras.QuantizeConfig):
    # Configure how to quantize weights.
    def get_weights_and_quantizers(self, layer):
      return [(layer.kernel, LastValueQuantizer(num_bits_wights, symmetric=True, narrow_range=False, per_axis=False))]

    # Configure how to quantize activations.
    def get_activations_and_quantizers(self, layer):
      return [(layer.activation, MovingAverageQuantizer(num_bits_activation, symmetric=False, narrow_range=False, per_axis=False))]

    def set_quantize_weights(self, layer, quantize_weights):
      # Add this line for each item returned in `get_weights_and_quantizers`
      # , in the same order
      layer.kernel = quantize_weights[0]

    def set_quantize_activations(self, layer, quantize_activations):
      # Add this line for each item returned in `get_activations_and_quantizers`
      # , in the same order.
      layer.activation = quantize_activations[0]

    # Configure how to quantize outputs (may be equivalent to activations).
    def get_output_quantizers(self, layer):
      return []

    def get_config(self):
      return {}

### Quantize vgg16 ALL LAYERS

In [101]:
quantize_annotate_layer = tfmot.quantization.keras.quantize_annotate_layer
quantize_annotate_model = tfmot.quantization.keras.quantize_annotate_model
quantize_scope = tfmot.quantization.keras.quantize_scope

# Define the model architecture.
model_vgg16_quant = tf.keras.Sequential([
  tf.keras.layers.InputLayer(input_shape=(32, 32, 3)),
  tf.keras.layers.Reshape(target_shape=(32, 32, 3)),
  #block-1
  quantize_annotate_layer(tf.keras.layers.Conv2D(filters=64, kernel_size=(3, 3),padding="same", activation='relu'), LayerQuantizeConfig()),
  quantize_annotate_layer(tf.keras.layers.Conv2D(filters=64, kernel_size=(3, 3),padding="same", activation='relu'), LayerQuantizeConfig()),
  tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),

  #block-2
  quantize_annotate_layer(tf.keras.layers.Conv2D(filters=128, kernel_size=(3, 3),padding="same", activation='relu'), LayerQuantizeConfig()),
  quantize_annotate_layer(tf.keras.layers.Conv2D(filters=128, kernel_size=(3, 3),padding="same", activation='relu'), LayerQuantizeConfig()),
  tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),

  #block-3
  quantize_annotate_layer(tf.keras.layers.Conv2D(filters=256, kernel_size=(3, 3),padding="same", activation='relu'), LayerQuantizeConfig()),
  quantize_annotate_layer(tf.keras.layers.Conv2D(filters=256, kernel_size=(3, 3),padding="same", activation='relu'), LayerQuantizeConfig()),
  tf.keras.layers.Conv2D(filters=256, kernel_size=(3, 3),padding="same", activation='relu'),
  tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),

  #block-4
  quantize_annotate_layer(tf.keras.layers.Conv2D(filters=512, kernel_size=(3, 3),padding="same", activation='relu'), LayerQuantizeConfig()),
  quantize_annotate_layer(tf.keras.layers.Conv2D(filters=512, kernel_size=(3, 3),padding="same", activation='relu'), LayerQuantizeConfig()),
  quantize_annotate_layer(tf.keras.layers.Conv2D(filters=512, kernel_size=(3, 3),padding="same", activation='relu'), LayerQuantizeConfig()),
  tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),

  #block-5
  quantize_annotate_layer(tf.keras.layers.Conv2D(filters=512, kernel_size=(3, 3),padding="same", activation='relu'), LayerQuantizeConfig()),
  quantize_annotate_layer(tf.keras.layers.Conv2D(filters=512, kernel_size=(3, 3),padding="same", activation='relu'), LayerQuantizeConfig()),
  quantize_annotate_layer(tf.keras.layers.Conv2D(filters=512, kernel_size=(3, 3),padding="same", activation='relu'), LayerQuantizeConfig()),
  tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
  

  tf.keras.layers.Flatten(),
  quantize_annotate_layer(tf.keras.layers.Dense(4096), LayerQuantizeConfig()),
  quantize_annotate_layer(tf.keras.layers.Dense(4096), LayerQuantizeConfig()),
  quantize_annotate_layer(tf.keras.layers.Dense(10), LayerQuantizeConfig())
])

# `quantize_apply` requires mentioning `DefaultDenseQuantizeConfig` with `quantize_scope`
# as well as the custom Keras layer.
with quantize_scope(
  {'LayerQuantizeConfig': LayerQuantizeConfig}):
  # Use `quantize_apply` to actually make the model quantization aware.
  quant_aware_model_vgg16 = tfmot.quantization.keras.quantize_apply(model_vgg16_quant)
  quant_aware_model_vgg16.summary()

Model: "sequential_23"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
reshape_24 (Reshape)         (None, 32, 32, 3)         0         
_________________________________________________________________
quant_conv2d_299 (QuantizeWr (None, 32, 32, 64)        1797      
_________________________________________________________________
quant_conv2d_300 (QuantizeWr (None, 32, 32, 64)        36933     
_________________________________________________________________
max_pooling2d_115 (MaxPoolin (None, 16, 16, 64)        0         
_________________________________________________________________
quant_conv2d_301 (QuantizeWr (None, 16, 16, 128)       73861     
_________________________________________________________________
quant_conv2d_302 (QuantizeWr (None, 16, 16, 128)       147589    
_________________________________________________________________
max_pooling2d_116 (MaxPoolin (None, 8, 8, 128)       

### Tarining on quantize vgg16

In [112]:
from keras.optimizers import Adam
from keras.optimizers import SGD

#opt = SGD(learning_rate=0.1)
# Compile the model
quant_aware_model_vgg16.compile(optimizer='sgd', loss=tf.keras.losses.categorical_crossentropy,metrics=['accuracy'])

# Fit data to model
quant_aware_model_vgg16.fit(trainX[:200], trainy[:200],
          batch_size=50,
          epochs=1,
          validation_split=0.1,
          )




<tensorflow.python.keras.callbacks.History at 0x7fd7f9d5a910>

### Compare baseline vgg16 and quantize vgg16 Train Accuracy

In [103]:
data_size = 200

baseline_model_accuracy = model_vgg16.evaluate(
  trainX[:data_size], trainy[:data_size], verbose=1)

q_aware_model_accuracy = quant_aware_model_vgg16.evaluate(
  trainX[:data_size], trainy[:data_size], verbose=1)

print('Baseline test accuracy:', baseline_model_accuracy)
print('Quant test accuracy:', q_aware_model_accuracy)

Baseline test accuracy: [2.2977724075317383, 0.14000000059604645]
Quant test accuracy: [3.414229393005371, 0.0949999988079071]


### Compare baseline vgg16 and quantize vgg16 Test Accuracy

In [105]:
baseline_model_accuracy = model_vgg16.evaluate(
  testX, testy, verbose=1)

q_aware_model_accuracy = quant_aware_model_vgg16.evaluate(
  testX, testy, verbose=1)



### Converting to tfLite for uploading microntroller

In [None]:
import tempfile
converter = tf.lite.TFLiteConverter.from_keras_model(quant_aware_model_vgg16)
converter.optimizations = [tf.lite.Optimize.DEFAULT]

quantized_tflite_model = converter.convert()
# Create float TFLite model.
float_converter = tf.lite.TFLiteConverter.from_keras_model(model_vgg16)
float_tflite_model = float_converter.convert()

# Measure sizes of models.
_, float_file = tempfile.mkstemp('.tflite')
_, quant_file = tempfile.mkstemp('.tflite')

with open(quant_file, 'wb') as f:
  f.write(quantized_tflite_model)

with open(float_file, 'wb') as f:
  f.write(float_tflite_model)

print("Float model in Mb:", os.path.getsize(float_file) / float(2**20))
print("Quantized model in Mb:", os.path.getsize(quant_file) / float(2**20))



INFO:tensorflow:Assets written to: /tmp/tmpx_un2xb8/assets


INFO:tensorflow:Assets written to: /tmp/tmpx_un2xb8/assets
