### Import Packages

In [10]:
# Necessary imports
import tensorflow as tf
from tensorflow import keras
import numpy as np

In [11]:
import pathlib

### Loading Dataset

In [12]:
mnist = tf.keras.datasets.fashion_mnist
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()
print("Train Image shape:", train_images.shape, "Test Image shape:", test_images.shape)

Train Image shape: (60000, 28, 28) Test Image shape: (10000, 28, 28)


In [13]:
# Define the text labels
fashion_mnist_labels = ["T-shirt/top",  # index 0
                        "Trouser",      # index 1
                        "Pullover",     # index 2 
                        "Dress",        # index 3 
                        "Coat",         # index 4
                        "Sandal",       # index 5
                        "Shirt",        # index 6 
                        "Sneaker",      # index 7 
                        "Bag",          # index 8 
                        "Ankle boot"]   # index 9

In [14]:
# Normalize the images
train_images = train_images / 255.0
test_images = test_images / 255.0

### Regular CNN1 with Conv2D Model Architecture

In [6]:
model = keras.Sequential([
  keras.layers.InputLayer(input_shape=(28, 28)),
  keras.layers.Reshape(target_shape=(28, 28, 1)),
  keras.layers.Conv2D(filters=12, kernel_size=(3, 3), activation='relu'),
  keras.layers.MaxPooling2D(pool_size=(2, 2)),
  keras.layers.Flatten(),
  keras.layers.Dense(10)
])


# Model summary
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
reshape (Reshape)            (None, 28, 28, 1)         0         
_________________________________________________________________
conv2d (Conv2D)              (None, 26, 26, 12)        120       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 13, 13, 12)        0         
_________________________________________________________________
flatten (Flatten)            (None, 2028)              0         
_________________________________________________________________
dense (Dense)                (None, 10)                20290     
Total params: 20,410
Trainable params: 20,410
Non-trainable params: 0
_________________________________________________________________


### CNN1 with separable filters - Operation optimization

In [7]:
sep_model = keras.Sequential([
  keras.layers.InputLayer(input_shape=(28, 28)),
  keras.layers.Reshape(target_shape=(28, 28, 1)),
  keras.layers.SeparableConv2D(filters=12, kernel_size=(3, 3), activation='relu'),
  keras.layers.MaxPooling2D(pool_size=(2, 2)),
  keras.layers.Flatten(),
  keras.layers.Dense(10)
])


# Model summary
sep_model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
reshape_1 (Reshape)          (None, 28, 28, 1)         0         
_________________________________________________________________
separable_conv2d (SeparableC (None, 26, 26, 12)        33        
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 13, 13, 12)        0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 2028)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 10)                20290     
Total params: 20,323
Trainable params: 20,323
Non-trainable params: 0
_________________________________________________________________


### Train the regular CNN1 with Conv2D: Train Model using MNIST Fashion dataset - Base model

In [8]:
model.compile(optimizer='adam',
              loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

In [9]:
model.fit(train_images,
         train_labels,
         batch_size=64,
         epochs=10,
         validation_data=(test_images, test_labels))

Train on 60000 samples, validate on 10000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f42de01e240>

In [10]:
# Saving Model
model.save('1_base_fashion_mnist_model.h5')

In [11]:
# Evaluate the model on test set
score = model.evaluate(test_images, test_labels, verbose=0)

# Print test accuracy
print('\n', 'Test accuracy:', score[1])


 Test accuracy: 0.8943


### Train the Sperable CNN1 using MNIST Fashion dataset

In [12]:
sep_model.compile(optimizer='adam',
              loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

In [13]:
sep_model.fit(train_images,
         train_labels,
         batch_size=64,
         epochs=10,
         validation_data=(test_images, test_labels))

Train on 60000 samples, validate on 10000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f4290fa16a0>

In [14]:
# Saving Model
sep_model.save('2_fashion_mnist_model_sperable.h5')

### Evaluate and find the model load and unit inference time for above .h5 seperable model (2_fashion_mnist_model_sperable.h5)

In [15]:
# Evaluate the model on test set
score = sep_model.evaluate(test_images, test_labels, verbose=0)

# Print test accuracy
print('\n', 'Test accuracy:', score[1])


 Test accuracy: 0.8578


In [15]:
import time
data = test_images[0]
data = data.reshape((1, 28, 28))
def orig_model_infer_time():
  start_time_full = time.time()
  model = tf.keras.models.load_model('./2_fashion_mnist_model_sperable.h5', custom_objects=None, compile=True)
  start_time_infer = time.time()
  model.predict(data)
  results = {'Time to load model and then infer': (time.time() - start_time_full),
             'Time to only infer': (time.time() - start_time_infer)}
  return results


In [16]:
def orig_model_infer_time_testset():
  start_time_testset = time.time()
  model = tf.keras.models.load_model('./2_fashion_mnist_model_sperable.h5', custom_objects=None, compile=True)
  score = model.evaluate(test_images, test_labels, verbose=0)
  results1 = { 'Time to load model and infer for full test set': (time.time() - start_time_testset)}
  
  return results1

In [17]:
orig_model_infer_time()

{'Time to load model and then infer': 0.26890087127685547,
 'Time to only infer': 0.0513150691986084}

In [18]:
orig_model_infer_time_testset()

{'Time to load model and infer for full test set': 1.7964997291564941}

### Convert The above .h5 seperable model (2_fashion_mnist_model_sperable.h5) into TFLite (3_fashion_mnist_model_sperable_tflite.tflite)

In [19]:
def ConvertTFLite(model_path, filename):
  try:
    # Loading Model
    model = tf.keras.models.load_model(model_path)
    # Converter
    converter = tf.lite.TFLiteConverter.from_keras_model(model)
    tflite_model = converter.convert()
    #Specify path
    tflite_models_dir = pathlib.Path("tflite_models/")
    tflite_models_dir.mkdir(exist_ok=True, parents=True)
    filename = filename+".tflite"
    tflite_model_file = tflite_models_dir/filename
    # Save Model
    tflite_model_file.write_bytes(tflite_model)

    return f'Converted to TFLite, path {tflite_model_file}'
  except Exception as e:
    return str(e)


In [20]:
ConvertTFLite('./2_fashion_mnist_model_sperable.h5','3_fashion_mnist_model_sperable_tflite')

'Converted to TFLite, path tflite_models/3_fashion_mnist_model_sperable_tflite.tflite'

### Convert The above .h5 seperable model (2_fashion_mnist_model_sperable.h5) into Integer with Float fall back Quantized model (4_fashion_mnist_seperable_Integer_float_model.tflite) 

In [20]:
def Quant_int_with_float(model_name, filename):
  try:
    model = tf.keras.models.load_model(model_name)
    converter = tf.lite.TFLiteConverter.from_keras_model(model)
    converter.optimizations = [tf.lite.Optimize.DEFAULT]
    tflite_model_quant = converter.convert()
    filename = filename+'.tflite'
    tflite_models_dir = pathlib.Path("tflite_models/")
    tflite_models_dir.mkdir(exist_ok=True, parents=True)
    
    tflite_model_quant_file = tflite_models_dir/filename
    tflite_model_quant_file.write_bytes(tflite_model_quant)

    return f'Converted - path {tflite_model_quant_file}'
  
  except Exception as e:
    return str(e)

In [21]:
Quant_int_with_float('./2_fashion_mnist_model_sperable.h5', '4_fashion_mnist_seperable_Integer_float_model')

'Converted - path tflite_models/4_fashion_mnist_seperable_Integer_float_model.tflite'

### Convert The above .h5 seperable model (2_fashion_mnist_model_sperable.h5) into Float 16 Quantized model (5_fashion_mnist_seperable_float16_model.tflite)  

In [22]:
def Quant_float(model_name, filename):
  try:
    model = tf.keras.models.load_model(model_name)
    converter = tf.lite.TFLiteConverter.from_keras_model(model)
    converter.optimizations = [tf.lite.Optimize.DEFAULT]
    converter.target_spec.supported_types = [tf.float16]

    tflite_fp16_model = converter.convert()
    filename = filename+'.tflite'
    tflite_models_fp16_dir = pathlib.Path("tflite_models/")
    tflite_models_fp16_dir.mkdir(exist_ok=True, parents=True)
    
    tflite_model_fp16_file = tflite_models_fp16_dir/filename
    tflite_model_fp16_file.write_bytes(tflite_fp16_model)

    return f'Converted - path {tflite_model_fp16_file}'
  
  except Exception as e:
    return str(e)

In [23]:
Quant_float('./2_fashion_mnist_model_sperable.h5', '5_fashion_mnist_seperable_float16_model')

'Converted - path tflite_models/5_fashion_mnist_seperable_float16_model.tflite'

### Convert The above .h5 seperable model (2_fashion_mnist_model_sperable.h5) into Integer only Quantized model (6_fashion_mnist_seperable_integeronly_model.tflite)  

In [24]:
def Quant_integer(model_name, filename):
  try:
    model = tf.keras.models.load_model(model_name)
    converter = tf.lite.TFLiteConverter.from_keras_model(model)
    converter.optimizations = [tf.lite.Optimize.DEFAULT]
    mnist_train, _ = tf.keras.datasets.fashion_mnist.load_data()
    images = tf.cast(mnist_train[0], tf.float32) / 255.0
    mnist_ds = tf.data.Dataset.from_tensor_slices((images)).batch(1)
    def representative_data_gen():
      for input_value in mnist_ds.take(100):
        yield [input_value]

    converter.representative_dataset = representative_data_gen

    converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
    converter.inference_input_type = tf.int8  # or tf.uint8
    converter.inference_output_type = tf.int8  # or tf.uint8

    tflite_int_quant_model = converter.convert()

    filename = filename+'.tflite'
    tflite_models_dir = pathlib.Path("tflite_models/")
    tflite_models_dir.mkdir(exist_ok=True, parents=True)
    
    tflite_model_integeronly_file = tflite_models_dir/filename
    tflite_model_integeronly_file.write_bytes(tflite_int_quant_model)

    return f'Converted - path {tflite_model_integeronly_file}'
  
  except Exception as e:
    return str(e)

In [25]:
Quant_integer('./2_fashion_mnist_model_sperable.h5', '6_fashion_mnist_seperable_integeronly_model')

'Converted - path tflite_models/6_fashion_mnist_seperable_integeronly_model.tflite'

In [21]:
import time

In [27]:
# Evaluate the mode
def evaluate_model(interpreter):
  start_time = time.time()

  input_index = interpreter.get_input_details()[0]["index"]
  output_index = interpreter.get_output_details()[0]["index"]

  # Run predictions on every image in the "test" dataset.
  prediction_digits = []
  for test_image in test_images:
    # Pre-processing: add batch dimension and convert to float32 to match with
    # the model's input data format.
    test_image = np.expand_dims(test_image, axis=0).astype(np.float32)
    interpreter.set_tensor(input_index, test_image)
  
    # Run inference.
    interpreter.invoke()

    # Post-processing: remove batch dimension and find the digit with highest
    # probability.
    output = interpreter.tensor(output_index)
    digit = np.argmax(output()[0])
    prediction_digits.append(digit)

  # Compare prediction results with ground truth labels to calculate accuracy.
  accurate_count = 0
  for index in range(len(prediction_digits)):
    if prediction_digits[index] == test_labels[index]:
      accurate_count += 1
  accuracy = accurate_count * 1.0 / len(prediction_digits)

  results = {'time': (time.time() - start_time),
             'accuracy': accuracy}

  # Loading Test Image
  test_img = np.expand_dims(test_images[0], axis=0).astype(np.float32)

  interpreter.set_tensor(input_index, test_img)
  start_time_infer = time.time()
  interpreter.invoke()

  predictions = interpreter.get_tensor(output_index)

  result = {"Unit inference time " : (time.time() - start_time_infer),
            "Time to load model and infer for testset ": (time.time() - start_time)}
  
  return result, results


In [28]:
tflite_model_file = 'tflite_models/3_fashion_mnist_model_sperable_tflite.tflite'
interpreter = tf.lite.Interpreter(model_path=str(tflite_model_file))
interpreter.allocate_tensors()
evaluate_model(interpreter)

({'Unit inference time:': 6.461143493652344e-05,
  'Time to load model and infer for testset: ': 0.7214105129241943},
 {'time': 0.7213327884674072, 'accuracy': 0.8578})

In [29]:
tflite_model_file = 'tflite_models/4_fashion_mnist_seperable_Integer_float_model.tflite'
interpreter = tf.lite.Interpreter(model_path=str(tflite_model_file))
interpreter.allocate_tensors()
evaluate_model(interpreter)

({'Unit inference time:': 0.0001232624053955078,
  'Time to load model and infer for testset: ': 0.7637655735015869},
 {'time': 0.7635705471038818, 'accuracy': 0.8594})

In [30]:
tflite_model_file = 'tflite_models/5_fashion_mnist_seperable_float16_model.tflite'
interpreter = tf.lite.Interpreter(model_path=str(tflite_model_file))
interpreter.allocate_tensors()
evaluate_model(interpreter)

({'Unit inference time:': 6.151199340820312e-05,
  'Time to load model and infer for testset: ': 0.7136926651000977},
 {'time': 0.713618278503418, 'accuracy': 0.8578})

In [31]:
tflite_model_file = 'tflite_models/6_fashion_mnist_seperable_integeronly_model.tflite'
interpreter = tf.lite.Interpreter(model_path=str(tflite_model_file))
interpreter.allocate_tensors()
evaluate_model(interpreter)

({'Unit inference time:': 0.0001125335693359375,
  'Time to load model and infer for testset: ': 1.227842092514038},
 {'time': 1.2277169227600098, 'accuracy': 0.862})