In [1]:
import tensorflow as tf
from tensorflow.keras import datasets, layers, models
import matplotlib.pyplot as plt

In [2]:
(train_images, train_labels), (test_images, test_labels) = datasets.cifar10.load_data()

# Normalize pixel values to be between 0 and 1
train_images, test_images = train_images / 255.0, test_images / 255.0

In [3]:
train_images.shape

(50000, 32, 32, 3)

In [4]:
test_images.shape

(10000, 32, 32, 3)

In [5]:
def calc_MACs_in_Conv(K, C_in, C_out, H_out, W_out): 
    return (K**2) * C_in * H_out * W_out * C_out 

In [6]:
# first conv
conv1 = calc_MACs_in_Conv(3, 3, 32, 30, 30)

In [7]:
# second conv
conv2 = calc_MACs_in_Conv(3, 32, 32, 28, 28)

In [8]:
def calc_MACs_in_FC(in_n, out_n): 
    return in_n * out_n 

In [9]:
# first fc
fc1 = calc_MACs_in_FC(6272, 128)

In [10]:
# second fc
fc2 = calc_MACs_in_FC(128, 10)

In [11]:
conv1 + conv2 + fc1 + fc2

8807040

In [12]:
# accuracy %71

In [13]:
model = models.Sequential()
model.add(layers.Conv2D(32, (3,3), use_bias=False,padding='valid', activation='relu', input_shape=(32,32,3)))
# model.add(layers.BatchNormalization())
model.add(layers.Conv2D(32, (3,3), use_bias=False,padding='valid', activation='relu'))
# model.add(layers.BatchNormalization())
model.add(layers.MaxPooling2D(pool_size=(2,2)))
model.add(layers.Dropout(0.3))
model.add(layers.Flatten())
model.add(layers.Dense(128, use_bias=False,activation='relu'))
# model.add(layers.BatchNormalization())
model.add(layers.Dropout(0.5))
model.add(layers.Dense(10, use_bias=False,activation='softmax'))    # num_classes = 10

In [14]:
import tensorflow_model_optimization as tfmot

quantize_model = tfmot.quantization.keras.quantize_model

# q_aware stands for for quantization aware.
q_aware_model = quantize_model(model)

# `quantize_model` requires a recompile.
q_aware_model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

q_aware_model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
quantize_layer (QuantizeLaye (None, 32, 32, 3)         3         
_________________________________________________________________
quant_conv2d (QuantizeWrappe (None, 30, 30, 32)        931       
_________________________________________________________________
quant_conv2d_1 (QuantizeWrap (None, 28, 28, 32)        9283      
_________________________________________________________________
quant_max_pooling2d (Quantiz (None, 14, 14, 32)        1         
_________________________________________________________________
quant_dropout (QuantizeWrapp (None, 14, 14, 32)        1         
_________________________________________________________________
quant_flatten (QuantizeWrapp (None, 6272)              1         
_________________________________________________________________
quant_dense (QuantizeWrapper (None, 128)               8

In [10]:
history = q_aware_model.fit(train_images, train_labels, epochs=20, batch_size=128,
                    validation_data=(test_images, test_labels))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [11]:
# baseline_model_accuracy = model.evaluate(test_images, test_labels, verbose=0)
q_aware_model_accuracy = q_aware_model.evaluate(test_images, test_labels, verbose=0)

# print('Baseline test accuracy:', baseline_model_accuracy)
print('Quant test accuracy:', q_aware_model_accuracy)

Quant test accuracy: [1.7958474159240723, 0.6626999974250793]


In [12]:
converter = tf.lite.TFLiteConverter.from_keras_model(q_aware_model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]

quantized_tflite_model = converter.convert()

Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
INFO:tensorflow:Assets written to: /tmp/tmp9rsvqc37/assets


In [14]:
import numpy as np

def evaluate_model(interpreter):
  input_index = interpreter.get_input_details()[0]["index"]
  output_index = interpreter.get_output_details()[0]["index"]

  # Run predictions on every image in the "test" dataset.
  prediction_digits = []
  for i, test_image in enumerate(test_images):
    if i % 1000 == 0:
      print('Evaluated on {n} results so far.'.format(n=i))
    # Pre-processing: add batch dimension and convert to float32 to match with
    # the model's input data format.
    test_image = np.expand_dims(test_image, axis=0).astype(np.float32)
    interpreter.set_tensor(input_index, test_image)

    # Run inference.
    interpreter.invoke()

    # Post-processing: remove batch dimension and find the digit with highest
    # probability.
    output = interpreter.tensor(output_index)
    digit = np.argmax(output()[0])
    prediction_digits.append(digit)

  print('\n')
  # Compare prediction results with ground truth labels to calculate accuracy.
  prediction_digits = np.array(prediction_digits)
  accuracy = (prediction_digits == test_labels).mean()
  return accuracy

In [15]:
interpreter = tf.lite.Interpreter(model_content=quantized_tflite_model)
interpreter.allocate_tensors()

test_accuracy = evaluate_model(interpreter)

print('Quant TFLite test_accuracy:', test_accuracy)
print('Quant TF test accuracy:', q_aware_model_accuracy)

Evaluated on 0 results so far.
Evaluated on 1000 results so far.
Evaluated on 2000 results so far.
Evaluated on 3000 results so far.
Evaluated on 4000 results so far.
Evaluated on 5000 results so far.
Evaluated on 6000 results so far.
Evaluated on 7000 results so far.
Evaluated on 8000 results so far.
Evaluated on 9000 results so far.


Quant TFLite test_accuracy: 0.1
Quant TF test accuracy: [1.7958474159240723, 0.6626999974250793]
