## Basic Imports

In [5]:
! pip install -q tensorflow-model-optimization
import time 
import tempfile
import os

import tensorflow as tf
import numpy as np
from tensorflow import keras
%load_ext tensorboard

Please see https://github.com/pypa/pip/issues/5599 for advice on fixing the underlying issue.
To avoid this problem you can invoke Python with '-m pip' instead of running pip directly.
The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


## Creating Simple Model 

In [7]:
with tf.device('/GPU:0'):
    # Load MNIST dataset
    mnist = tf.keras.datasets.mnist
    (train_images, train_labels), (test_images, test_labels) = mnist.load_data()

    # Normalize the input image so that each pixel value is between 0 to 1.
    train_images = train_images / 255.0
    test_images = test_images / 255.0

In [20]:
# Load MNIST dataset
mnist = tf.keras.datasets.mnist
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

# Normalize the input image so that each pixel value is between 0 to 1.
train_images = train_images / 255.0
test_images = test_images / 255.0

In [23]:
model = tf.keras.Sequential([
  keras.layers.InputLayer(input_shape=(28, 28)),
  keras.layers.Reshape(target_shape=(28, 28, 1)),
  keras.layers.Conv2D(64, (3, 3), strides=(2, 2), padding="same"),
  keras.layers.LeakyReLU(alpha=0.2),
  keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(1, 1), padding="same"),
  keras.layers.Conv2D(128, (3, 3), strides=(2, 2), padding="same"),
  keras.layers.Flatten(),
  keras.layers.Dense(10),
])

model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

model.fit(
  train_images,
  train_labels,
  batch_size=500,
  epochs=3,
  validation_split=0.1,
)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x7f00b8561650>

In [None]:
!mkdir -p saved_model
model.save('saved_model/SimpleModel.h5') 

Simple Model Average time Result:


In [None]:
S_results=[]
for i in range(50):
  start_time = time.time()
  model.predict(test_images)
  endtime=time.time()
  rr = endtime- start_time
  S_results.append(rr)
  i+=1

KeyboardInterrupt: ignored

In [None]:
print(S_results)

avg = sum(S_results)/len(S_results)
print("Average time taken for prediction :  "+avg)


[2.755086660385132, 2.5493507385253906, 2.538020610809326, 2.5196452140808105, 2.4089927673339844, 2.415070056915283, 2.545689105987549, 2.412343978881836, 2.5223662853240967, 2.488382339477539, 2.5396316051483154, 2.5611538887023926, 2.5675880908966064, 2.498210906982422, 2.4723033905029297, 2.4683926105499268, 2.5220909118652344, 2.481600761413574, 2.442011833190918, 2.5514285564422607, 2.474669933319092, 2.455540418624878, 2.55790114402771, 2.537698984146118, 2.4864423274993896, 2.457925796508789, 2.5213937759399414, 2.5351755619049072, 2.551759719848633, 2.5363664627075195, 2.5418848991394043, 2.48974609375, 2.5163612365722656, 2.5045766830444336, 2.4725427627563477, 2.5592498779296875, 2.5858469009399414, 2.6143343448638916, 2.5054686069488525, 2.5148749351501465, 2.5692832469940186, 2.4972281455993652, 2.505478858947754, 2.5301096439361572, 2.593036651611328, 2.4699435234069824, 2.476102113723755, 2.499424934387207, 2.5535120964050293, 2.496169328689575]
2.517388186454773


## Pruning the Simple Model

In [None]:
import tensorflow_model_optimization as tfmot

prune_low_magnitude = tfmot.sparsity.keras.prune_low_magnitude

# Compute end step to finish pruning after 2 epochs.
batch_size = 128
epochs = 2
validation_split = 0.1 # 10% of training set will be used for validation set. 

num_images = train_images.shape[0] * (1 - validation_split)
end_step = np.ceil(num_images / batch_size).astype(np.int32) * epochs

# Define model for pruning.
pruning_params = {
      'pruning_schedule': tfmot.sparsity.keras.PolynomialDecay(initial_sparsity=0.50,
                                                               final_sparsity=0.80,
                                                               begin_step=0,
                                                               end_step=end_step)
}

model_for_pruning = prune_low_magnitude(model, **pruning_params)

# `prune_low_magnitude` requires a recompile.
model_for_pruning.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

model_for_pruning.summary()



Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
prune_low_magnitude_reshape  (None, 28, 28, 1)         1         
_________________________________________________________________
prune_low_magnitude_conv2d ( (None, 14, 14, 64)        1218      
_________________________________________________________________
prune_low_magnitude_leaky_re (None, 14, 14, 64)        1         
_________________________________________________________________
prune_low_magnitude_max_pool (None, 14, 14, 64)        1         
_________________________________________________________________
prune_low_magnitude_conv2d_1 (None, 7, 7, 128)         147586    
_________________________________________________________________
prune_low_magnitude_flatten  (None, 6272)              1         
_________________________________________________________________
prune_low_magnitude_dense (P (None, 10)                1

Fine tune with pruning ..
tfmot.sparsity.keras.UpdatePruningStep is required during training, and tfmot.sparsity.keras.PruningSummaries provides logs for tracking progress and debugging.

In [None]:
logdir = tempfile.mkdtemp()

callbacks = [
  tfmot.sparsity.keras.UpdatePruningStep(),
  tfmot.sparsity.keras.PruningSummaries(log_dir=logdir),
]
model_for_pruning.fit(
  train_images,
  train_labels,
  epochs=4,
  validation_split=0.1,
  callbacks=callbacks
)

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<tensorflow.python.keras.callbacks.History at 0x7fcc30c41fd0>

In [None]:
P_S_results=[]
for i in range(50):
  start_time = time.time()
  model_for_pruning.predict(test_images)
  endtime=time.time()
  rr = endtime- start_time
  P_S_results.append(rr)
  i+=1


In [None]:
print(P_S_results)

avg = sum(P_S_results)/len(P_S_results)
print("Average time taken for prediction :  "+avg)


## TFLite Post Quantization


In [None]:
tf_lite_converter = tf.lite.TFLiteConverter.from_keras_model(model)
tf_lite_converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_model = tf_lite_converter.convert()
tflite_model_name = 'TFlite_post_quantModel8bit'
open(tflite_model_name, "wb").write(tflite_model)

In [None]:
#Loading and checking the required parameters of tflite model
interpreter = tf.lite.Interpreter(model_path = tflite_model_name)
interpreter.allocate_tensors
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
print("Input Shape:", input_details[0]['shape'])
print("Input Type:", input_details[0]['dtype'])
print("Output Shape:", output_details[0]['shape'])
print("Output Type:", output_details[0]['dtype'])

In [1]:
sum([1,2,3])

6

In [None]:
#doing some necessary preprocessing on input testing data 
print(test_images.dtype)
X_test_numpy = np.array(test_images, dtype=np.float32)
y_test_numpy =np.array(test_labels, dtype=np.float32)
print(X_test_numpy.shape,y_test_numpy.shape)

In [None]:
#loading and testing saved TFLite model
pred = []
pred_Time =[]
for i in range (len(y_test_numpy)):  
  start_time= time.time()
  interpreter = tf.lite.Interpreter(model_path = tflite_model_name)
  interpreter.allocate_tensors()
  input_details = interpreter.get_input_details()
  output_details = interpreter.get_output_details()
  # X_test_numpy=X_test_numpy.reshape(1, 1025, 1)
  inp = X_test_numpy[i]
  inp = inp.reshape(1 ,28, 28)
  # print(inp.shape)
  interpreter.set_tensor(input_details[0]['index'],inp )
  interpreter.invoke()
  tflite_model_predictions = interpreter.get_tensor(output_details[0]['index'])

  # print("Prediction results shape:", tflite_model_predictions.shape)
  prediction_classes = np.argmax(tflite_model_predictions, axis=1)
  pred.append(prediction_classes)
  endtime=time.time()
  tt = endtime-start_time
  pred_Time.append(tt)




In [34]:
print(pred_Time)

avg = sum(pred_Time)/len(pred_Time)
print("Average time taken for prediction :  "+avg)

[0.012625694274902344, 0.012302637100219727, 0.01238393783569336, 0.012646913528442383, 0.012439727783203125, 0.012135028839111328, 0.01224374771118164, 0.013115167617797852, 0.012178421020507812, 0.012146472930908203, 0.014551877975463867, 0.015870094299316406, 0.012391090393066406, 0.01239013671875, 0.012281656265258789, 0.012552976608276367, 0.012296915054321289, 0.012116432189941406, 0.012193441390991211, 0.012282609939575195, 0.012417793273925781, 0.012485027313232422, 0.012436151504516602, 0.012309789657592773, 0.012247085571289062, 0.012286663055419922, 0.012106657028198242, 0.01242971420288086, 0.012628793716430664, 0.01233673095703125, 0.0122833251953125, 0.012148618698120117, 0.012144804000854492, 0.012182474136352539, 0.012216567993164062, 0.0122222900390625, 0.012102365493774414, 0.012454986572265625, 0.012613296508789062, 0.012382030487060547, 0.014008045196533203, 0.012449026107788086, 0.012398242950439453, 0.012163162231445312, 0.012228250503540039, 0.012339591979980469,