In [1]:
import tensorflow as tf
from keras.datasets import cifar10
import numpy
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Input
import keras,os
from keras.models import Sequential
from keras.layers import Dense, Conv2D, MaxPool2D , Flatten
import numpy as np

### Loading CIFAR-10 dataset

In [2]:
(trainX, trainy), (testX, testy) = cifar10.load_data()

In [3]:
# print to make sure we have the correct shapes + number of images for training
print("number of train pictures:", trainX.shape)
print("number of trained picture values:", trainy.shape)
# divide by 255 to make [0,255] into [0,1] + print to make sure!
trainy = tf.keras.utils.to_categorical(trainy,10)
testy = tf.keras.utils.to_categorical(testy,10)
trainX = trainX/255.0
testX = testX/255.0

number of train pictures: (50000, 32, 32, 3)
number of trained picture values: (50000, 1)


### VGG-16 model

In [4]:
# example of loading the vgg16 model
from keras.applications.vgg16 import VGG16
# load model
model = VGG16()
# summarize the model
model.summary()

Model: "vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 224, 224, 3)]     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0     

### VGG-16 Clone Without Quantization

In [5]:
model_wq = tf.keras.Sequential()
#block-1
model_wq.add(Conv2D(input_shape=(32,32,3),
                    filters=64,kernel_size=(3,3),
                    padding="same", 
                    activation="relu",
                    name='block1_conv1'))
model_wq.add(Dropout(0.3))
model_wq.add(Conv2D(filters=64,
                    kernel_size=(3,3),
                    padding="same", 
                    activation="relu",
                    name='block1_conv2'))
model_wq.add(Dropout(0.4))
model_wq.add(MaxPool2D(pool_size=(2,2), strides=(2,2), name='block1_pool'))


#block-2
model_wq.add(Conv2D(filters=128, 
                    kernel_size=(3,3), 
                    padding="same", 
                    activation="relu",
                    name='block2_conv1'))
model_wq.add(Dropout(0.4))
model_wq.add(Conv2D(filters=128, 
                    kernel_size=(3,3), 
                    padding="same", 
                    activation="relu",
                    name='block2_conv2'))
model_wq.add(Dropout(0.4))
model_wq.add(MaxPool2D(pool_size=(2,2),strides=(2,2), name='block2_pool'))

#block-3
model_wq.add(Conv2D(filters=256, 
                    kernel_size=(3,3), 
                    padding="same", 
                    activation="relu",
                    name='block3_conv1'))
model_wq.add(Dropout(0.4))
model_wq.add(Conv2D(filters=256, 
                    kernel_size=(3,3), 
                    padding="same", 
                    activation="relu",
                    name='block3_conv2'))
model_wq.add(Dropout(0.4))
model_wq.add(Conv2D(filters=256, 
                    kernel_size=(3,3), 
                    padding="same", 
                    activation="relu",
                    name='block3_conv3'))
model_wq.add(MaxPool2D(pool_size=(2,2),strides=(2,2), name='block3_pool'))

#block-4
model_wq.add(Conv2D(filters=512, 
                    kernel_size=(3,3), 
                    padding="same", 
                    activation="relu",
                    name='block4_conv1'))
model_wq.add(Dropout(0.4))
model_wq.add(Conv2D(filters=512, 
                    kernel_size=(3,3), 
                    padding="same", 
                    activation="relu",
                    name='block4_conv2'))
model_wq.add(Dropout(0.4))
model_wq.add(Conv2D(filters=512, 
                    kernel_size=(3,3), 
                    padding="same", 
                    activation="relu",
                    name='block4_conv3'))
model_wq.add(Dropout(0.4))
model_wq.add(MaxPool2D(pool_size=(2,2),strides=(2,2), name='block4_pool'))

#block-5
model_wq.add(Conv2D(filters=512, 
                    kernel_size=(3,3), 
                    padding="same", 
                    activation="relu",
                    name='block5_conv1'))
model_wq.add(Dropout(0.4))
model_wq.add(Conv2D(filters=512, 
                    kernel_size=(3,3), 
                    padding="same", 
                    activation="relu",
                    name='block5_conv2'))
model_wq.add(Dropout(0.4))
model_wq.add(Conv2D(filters=512, 
                    kernel_size=(3,3), 
                    padding="same", 
                    activation="relu",
                    name='block5_conv3'))
model_wq.add(Dropout(0.3))
model_wq.add(MaxPool2D(pool_size=(2,2),strides=(2,2), name='block5_pool'))


#fc1, fc2 and predictions
model_wq.add(Dropout(0.5))
model_wq.add(Flatten(name='flatten'))
model_wq.add(Dense(units=4096,activation="relu",name='fc1'))
model_wq.add(Dense(units=4096,activation="relu",name='fc2'))

model_wq.add(Dropout(0.5))
model_wq.add(Dense(units=10, activation="softmax",name='predictions'))

model_wq.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
block1_conv1 (Conv2D)        (None, 32, 32, 64)        1792      
_________________________________________________________________
dropout (Dropout)            (None, 32, 32, 64)        0         
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 32, 32, 64)        36928     
_________________________________________________________________
dropout_1 (Dropout)          (None, 32, 32, 64)        0         
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 16, 16, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 16, 16, 128)       73856     
_________________________________________________________________
dropout_2 (Dropout)          (None, 16, 16, 128)       0

### Tranining withgout quantization vgg16

In [6]:
from keras.optimizers import Adam
from keras.optimizers import SGD

opt = SGD(lr=0.1)
# Compile the model
model_wq.compile(optimizer=opt, loss=tf.keras.losses.categorical_crossentropy,metrics=['accuracy'])

# Fit data to model
model_wq.fit(trainX, trainy,
          batch_size=50,
          epochs=15,
          validation_split=0.2)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<tensorflow.python.keras.callbacks.History at 0x10f66b580>

### Loss and Accuracy without quantization

In [7]:
score = model_wq.evaluate(testX, testy, verbose=1)
print("Test loss {:.4f}, accuracy {:.2f}%".format(score[0], score[1] * 100))

Test loss 2.3033, accuracy 10.00%


### Defining the quantization config
`DefaultDenseQuantizeConfig` is 8 bit

`ModifiedDenseQuantizeConfig` is 4 bit

`UltraDenseQuantizeConfig` is 2 bit

In [8]:
import tensorflow_model_optimization as tfmot

LastValueQuantizer = tfmot.quantization.keras.quantizers.LastValueQuantizer
MovingAverageQuantizer = tfmot.quantization.keras.quantizers.MovingAverageQuantizer

class DefaultDenseQuantizeConfig(tfmot.quantization.keras.QuantizeConfig):
    # Configure how to quantize weights.
    def get_weights_and_quantizers(self, layer):
      return [(layer.kernel, LastValueQuantizer(num_bits=8, symmetric=True, narrow_range=False, per_axis=False))]

    # Configure how to quantize activations.
    def get_activations_and_quantizers(self, layer):
      return [(layer.activation, MovingAverageQuantizer(num_bits=8, symmetric=False, narrow_range=False, per_axis=False))]

    def set_quantize_weights(self, layer, quantize_weights):
      # Add this line for each item returned in `get_weights_and_quantizers`
      # , in the same order
      layer.kernel = quantize_weights[0]

    def set_quantize_activations(self, layer, quantize_activations):
      # Add this line for each item returned in `get_activations_and_quantizers`
      # , in the same order.
      layer.activation = quantize_activations[0]

    # Configure how to quantize outputs (may be equivalent to activations).
    def get_output_quantizers(self, layer):
      return []

    def get_config(self):
      return {}

class ModifiedDenseQuantizeConfig(tfmot.quantization.keras.QuantizeConfig):
    def get_weights_and_quantizers(self, layer):
      return [(layer.kernel, LastValueQuantizer(num_bits=4, symmetric=True, narrow_range=False, per_axis=False))]

    def get_activations_and_quantizers(self, layer):
      return [(layer.activation, MovingAverageQuantizer(num_bits=4, symmetric=False, narrow_range=False, per_axis=False))]

    def set_quantize_weights(self, layer, quantize_weights):
      # Add this line for each item returned in `get_weights_and_quantizers`
      # , in the same order
      layer.kernel = quantize_weights[0]

    def set_quantize_activations(self, layer, quantize_activations):
      # Add this line for each item returned in `get_activations_and_quantizers`
      # , in the same order.
      layer.activation = quantize_activations[0]

    # Configure how to quantize outputs (may be equivalent to activations).
    def get_output_quantizers(self, layer):
      return []

    def get_config(self):
      return {}

class UltraDenseQuantizeConfig(tfmot.quantization.keras.QuantizeConfig):
    def get_weights_and_quantizers(self, layer):
      return [(layer.kernel, LastValueQuantizer(num_bits=2, symmetric=True, narrow_range=False, per_axis=False))]

    def get_activations_and_quantizers(self, layer):
      return [(layer.activation, MovingAverageQuantizer(num_bits=2, symmetric=False, narrow_range=False, per_axis=False))]

    def set_quantize_weights(self, layer, quantize_weights):
      # Add this line for each item returned in `get_weights_and_quantizers`
      # , in the same order
      layer.kernel = quantize_weights[0]

    def set_quantize_activations(self, layer, quantize_activations):
      # Add this line for each item returned in `get_activations_and_quantizers`
      # , in the same order.
      layer.activation = quantize_activations[0]

    # Configure how to quantize outputs (may be equivalent to activations).
    def get_output_quantizers(self, layer):
      return []

    def get_config(self):
      return {}


### Quantizing vgg-16
`ModifiedDenseQuantizeConfig` is 4 bit

In [9]:

annotate = tfmot.quantization.keras.quantize_annotate_layer

quant_vgg16 = tf.keras.Sequential()
    # Only annotated layers will be quantized
    
#block-1
quant_vgg16.add(annotate(Conv2D(input_shape=(32,32,3),
                    filters=64,kernel_size=(3,3),
                    padding="same", 
                    activation="relu",
                    name='block1_conv1'), quantize_config=ModifiedDenseQuantizeConfig()))
quant_vgg16.add(Dropout(0.3))   

quant_vgg16.add(annotate(Conv2D(filters=64,
                    kernel_size=(3,3),
                    padding="same", 
                    activation="relu",
                    name='block1_conv2'),
                    quantize_config=ModifiedDenseQuantizeConfig()))
quant_vgg16.add(Dropout(0.4))
quant_vgg16.add(MaxPool2D(pool_size=(2,2), strides=(2,2), name='block1_pool'))


#block-2
quant_vgg16.add(annotate(Conv2D(filters=128, 
                    kernel_size=(3,3), 
                    padding="same", 
                    activation="relu",
                    name='block2_conv1'),
                    quantize_config=ModifiedDenseQuantizeConfig()))
quant_vgg16.add(Dropout(0.4))
quant_vgg16.add(annotate(Conv2D(filters=128, 
                    kernel_size=(3,3), 
                    padding="same", 
                    activation="relu",
                    name='block2_conv2'),
                    quantize_config=ModifiedDenseQuantizeConfig()))
quant_vgg16.add(Dropout(0.4))
quant_vgg16.add(MaxPool2D(pool_size=(2,2),strides=(2,2), name='block2_pool'))

#block-3
quant_vgg16.add(annotate(Conv2D(filters=256, 
                    kernel_size=(3,3), 
                    padding="same", 
                    activation="relu",
                    name='block3_conv1'),
                    quantize_config=ModifiedDenseQuantizeConfig()))
quant_vgg16.add(Dropout(0.4))
quant_vgg16.add(annotate(Conv2D(filters=256, 
                    kernel_size=(3,3), 
                    padding="same", 
                    activation="relu",
                    name='block3_conv2'),
                    quantize_config=ModifiedDenseQuantizeConfig()))
quant_vgg16.add(Dropout(0.4))
quant_vgg16.add(annotate(Conv2D(filters=256, 
                    kernel_size=(3,3), 
                    padding="same", 
                    activation="relu",
                    name='block3_conv3'),
                    quantize_config=ModifiedDenseQuantizeConfig()))
quant_vgg16.add(Dropout(0.4))
quant_vgg16.add(MaxPool2D(pool_size=(2,2),strides=(2,2), name='block3_pool'))

#block-4
quant_vgg16.add(annotate(Conv2D(filters=512, 
                    kernel_size=(3,3), 
                    padding="same", 
                    activation="relu",
                    name='block4_conv1'),
                    quantize_config=ModifiedDenseQuantizeConfig()))
quant_vgg16.add(Dropout(0.4))
quant_vgg16.add(annotate(Conv2D(filters=512, 
                    kernel_size=(3,3), 
                    padding="same", 
                    activation="relu",
                    name='block4_conv2'),
                    quantize_config=ModifiedDenseQuantizeConfig()))
quant_vgg16.add(Dropout(0.4))
quant_vgg16.add(annotate(Conv2D(filters=512, 
                    kernel_size=(3,3), 
                    padding="same", 
                    activation="relu",
                    name='block4_conv3'),
                    quantize_config=ModifiedDenseQuantizeConfig()))
quant_vgg16.add(Dropout(0.4))
quant_vgg16.add(MaxPool2D(pool_size=(2,2),strides=(2,2), name='block4_pool'))

#block-5
quant_vgg16.add(annotate(Conv2D(filters=512, 
                    kernel_size=(3,3), 
                    padding="same", 
                    activation="relu",
                    name='block5_conv1'),
                    quantize_config=ModifiedDenseQuantizeConfig()))
quant_vgg16.add(Dropout(0.4))
quant_vgg16.add(annotate(Conv2D(filters=512, 
                    kernel_size=(3,3), 
                    padding="same", 
                    activation="relu",
                    name='block5_conv2'),
                    quantize_config=ModifiedDenseQuantizeConfig()))
quant_vgg16.add(Dropout(0.4))
quant_vgg16.add(annotate(Conv2D(filters=512, 
                    kernel_size=(3,3), 
                    padding="same", 
                    activation="relu",
                    name='block5_conv3'),
                    quantize_config=ModifiedDenseQuantizeConfig()))
quant_vgg16.add(Dropout(0.4))
quant_vgg16.add(MaxPool2D(pool_size=(2,2),strides=(2,2), name='block5_pool'))

#fc1, fc2 and predictions
quant_vgg16.add(Dropout(0.5))
quant_vgg16.add(annotate(Flatten(name='flatten')))
quant_vgg16.add(annotate(Dense(units=4096,activation="relu",name='fc1'), quantize_config=ModifiedDenseQuantizeConfig()))

quant_vgg16.add(Dropout(0.5))
quant_vgg16.add(annotate(Dense(units=4096,activation="relu",name='fc2'), quantize_config=ModifiedDenseQuantizeConfig()))
quant_vgg16.add(Dense(units=10, activation="softmax",name='predictions'))    
  

quantize_scope = tfmot.quantization.keras.quantize_scope

# `quantize_apply` requires mentioning `DefaultDenseQuantizeConfig` with `quantize_scope`
# as well as the custom Keras layer.
with quantize_scope(
  {'DefaultDenseQuantizeConfig': DefaultDenseQuantizeConfig,
  'ModifiedDenseQuantizeConfig':ModifiedDenseQuantizeConfig,
  'UltraDenseQuantizeConfig':UltraDenseQuantizeConfig}):
  # Use `quantize_apply` to actually make the model quantization aware.
  vgg_quant_model = tfmot.quantization.keras.quantize_apply(quant_vgg16)
    
quant_vgg16.summary()


Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
quantize_annotate (QuantizeA (None, 32, 32, 64)        1792      
_________________________________________________________________
dropout_14 (Dropout)         (None, 32, 32, 64)        0         
_________________________________________________________________
quantize_annotate_1 (Quantiz (None, 32, 32, 64)        36928     
_________________________________________________________________
dropout_15 (Dropout)         (None, 32, 32, 64)        0         
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 16, 16, 64)        0         
_________________________________________________________________
quantize_annotate_2 (Quantiz (None, 16, 16, 128)       73856     
_________________________________________________________________
dropout_16 (Dropout)         (None, 16, 16, 128)      

In [10]:
from keras.optimizers import Adam
from keras.optimizers import SGD

opt = SGD(lr=0.1)
# Compile the model
quant_vgg16.compile(optimizer=opt, loss=tf.keras.losses.categorical_crossentropy,metrics=['accuracy'])

# Fit data to model
quant_vgg16.fit(trainX, trainy,
          batch_size=50,
          epochs=15,
          validation_split=0.2)

converter = tf.lite.TFLiteConverter.from_keras_model(quant_vgg16)
converter.optimizations = [tf.lite.Optimize.DEFAULT]

quantized_tflite_model = converter.convert()

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15




INFO:tensorflow:Assets written to: /var/folders/1q/ns7tqnvx55lfc3nl7vf90tth0000gn/T/tmp2xhx82j8/assets


INFO:tensorflow:Assets written to: /var/folders/1q/ns7tqnvx55lfc3nl7vf90tth0000gn/T/tmp2xhx82j8/assets


### Loss and Accuracy with quantization

In [16]:
quant_score = quant_vgg16.evaluate(testX, testy, verbose=1)
print("Test loss {:.4f}, accuracy {:.2f}%".format(score[0], score[1] * 100))

Test loss 2.3029, accuracy 10.00%


### Quantization vs Without quantization Test loss
#### note that the model without quantization is 528 MB and the quantized model is 32 MB (16x decrease)

In [13]:
import tempfile
# Create float TFLite model.
model_wq = tf.lite.TFLiteConverter.from_keras_model(model)
model_wq = model_wq.convert()
print('converted')
# Measure sizes of models.
_, float_file = tempfile.mkstemp('.tflite')
_, quant_file = tempfile.mkstemp('.tflite')
print('files')
with open(quant_file, 'wb') as f:
  f.write(quantized_tflite_model)
print('done')
with open(float_file, 'wb') as f:
  f.write(model_wq)

print("Float model in Mb:", os.path.getsize(float_file) / float(2**20))
print("Quantized model in Mb:", os.path.getsize(quant_file) / float(2**20))

INFO:tensorflow:Assets written to: /var/folders/1q/ns7tqnvx55lfc3nl7vf90tth0000gn/T/tmpdricefb_/assets


INFO:tensorflow:Assets written to: /var/folders/1q/ns7tqnvx55lfc3nl7vf90tth0000gn/T/tmpdricefb_/assets


converted
files
done
Float model in Mb: 527.8010864257812
Quantized model in Mb: 32.207611083984375


## Evaluate the test loss for quantized and unquantized models

In [19]:
print('Test loss for quantized model is ',quant_score[0])
print('Test loss for normal (unquantized) model is ',score[0])
quant_percent = (quant_score[0] - score[0])/ score[0]
print('Percent change in quantized model test loss and normal model test loss is ', quant_percent, '%')

Test loss for quantized model is  2.3028557300567627
Test loss for normal (unquantized) model is  2.3028557300567627
Percent change in quantized model test loss and normal model test loss is  0.0 %


## Save quantized model to upload on microcontroller

In [20]:
open("cifar_quant.tflite", "wb").write(quantized_tflite_model)

33772128