In [4]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np

In [5]:
import traceback
import contextlib

In [23]:
@contextlib.contextmanager
def options(options):
  old_opts = tf.config.optimizer.get_experimental_options()
  tf.config.optimizer.set_experimental_options(options)
  try:
    yield
  finally:
    tf.config.optimizer.set_experimental_options(old_opts)

In [7]:
mnist = tf.keras.datasets.fashion_mnist
(X_train, y_train), (X_test, y_test) = mnist.load_data()
print("Train Image shape:", X_train.shape, "Test Image shape:", X_test.shape)

Train Image shape: (60000, 28, 28) Test Image shape: (10000, 28, 28)


In [8]:
# Normalize the images
X_train = X_train / 255.0
X_test = X_test / 255.0

### Regular training using model.fit function and MNIST Fashion dataset - Base model

In [6]:
model = keras.Sequential([
  keras.layers.InputLayer(input_shape=(28, 28)),
  keras.layers.Reshape(target_shape=(28, 28, 1)),
  keras.layers.Conv2D(filters=12, kernel_size=(3, 3), activation='relu'),
  keras.layers.MaxPooling2D(pool_size=(2, 2)),
  keras.layers.Flatten(),
  keras.layers.Dense(10)
])


# Model summary
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
reshape (Reshape)            (None, 28, 28, 1)         0         
_________________________________________________________________
conv2d (Conv2D)              (None, 26, 26, 12)        120       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 13, 13, 12)        0         
_________________________________________________________________
flatten (Flatten)            (None, 2028)              0         
_________________________________________________________________
dense (Dense)                (None, 10)                20290     
Total params: 20,410
Trainable params: 20,410
Non-trainable params: 0
_________________________________________________________________


In [7]:
def fit_model():
    model.compile(optimizer='adam',
              loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])
    model.fit(X_train,
              y_train,
              batch_size=64,
              epochs=10,
              validation_data=(X_test, y_test))


def timeit(func):
    def timed():
        start = time.time()
        func()
        print(f'Took: {(time.time() - start):.5f}')
    return timed

In [8]:
import time
timeit(fit_model)()

Train on 60000 samples, validate on 10000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Took: 112.74551


In [9]:
model.save('1_base_fashion_mnist.h5')

### Model Trainig with Custom training with TF.Function - Graph Optimization

In [9]:
# Prepare the training dataset.
batch_size = 64

train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
train_dataset = train_dataset.shuffle(buffer_size=1024).batch(batch_size)

In [10]:
# Prepare the validation dataset.
# Reserve 10,000 samples for validation.
x_val = X_train[-10000:]
y_val = y_train[-10000:]
x_train = X_train[:-10000]
y_train = y_train[:-10000]
val_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test))
val_dataset = val_dataset.batch(64)

In [8]:
# Instantiate an optimizer to train the model.
optimizer = keras.optimizers.Adam(learning_rate=1e-3)

In [9]:
# Instantiate a loss function.
loss_fn = keras.losses.SparseCategoricalCrossentropy(from_logits=True)

In [10]:
# Prepare the metrics.
train_acc_metric = keras.metrics.SparseCategoricalAccuracy()
val_acc_metric = keras.metrics.SparseCategoricalAccuracy()

In [11]:
model_2 = keras.Sequential([
  keras.layers.InputLayer(input_shape=(28, 28)),
  keras.layers.Reshape(target_shape=(28, 28, 1)),
  keras.layers.Conv2D(filters=12, kernel_size=(3, 3), activation='relu'),
  keras.layers.MaxPooling2D(pool_size=(2, 2)),
  keras.layers.Flatten(),
  keras.layers.Dense(10)
])


# Model summary
model_2.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
reshape (Reshape)            (None, 28, 28, 1)         0         
_________________________________________________________________
conv2d (Conv2D)              (None, 26, 26, 12)        120       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 13, 13, 12)        0         
_________________________________________________________________
flatten (Flatten)            (None, 2028)              0         
_________________________________________________________________
dense (Dense)                (None, 10)                20290     
Total params: 20,410
Trainable params: 20,410
Non-trainable params: 0
_________________________________________________________________


In [12]:
@tf.function
def train_step(x, y):
    with tf.GradientTape() as tape:
        logits = model_2(x, training=True)
        loss_value = loss_fn(y, logits)
    grads = tape.gradient(loss_value, model_2.trainable_weights)
    optimizer.apply_gradients(zip(grads, model_2.trainable_weights))
    train_acc_metric.update_state(y, logits)
    return loss_value

In [13]:
@tf.function
def test_step(x, y):
    val_logits = model_2(x, training=False)
    val_acc_metric.update_state(y, val_logits)

In [20]:
import time

epochs = 10
for epoch in range(epochs):
    print("\nStart of epoch %d" % (epoch,))
    start_time = time.time()

    # Iterate over the batches of the dataset.
    for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):
        loss_value = train_step(x_batch_train, y_batch_train)

        # Log every 200 batches.
        if step % 200 == 0:
            print(
                "Training loss (for one batch) at step %d: %.4f"
                % (step, float(loss_value))
            )
            print("Seen so far: %d samples" % ((step + 1) * 64))

    # Display metrics at the end of each epoch.
    train_acc = train_acc_metric.result()
    print("Training acc over epoch: %.4f" % (float(train_acc),))

    # Reset training metrics at the end of each epoch
    train_acc_metric.reset_states()

    # Run a validation loop at the end of each epoch.
    for x_batch_val, y_batch_val in val_dataset:
        test_step(x_batch_val, y_batch_val)

    val_acc = val_acc_metric.result()
    val_acc_metric.reset_states()
    print("Validation acc: %.4f" % (float(val_acc),))
    print("Time taken: %.2fs" % (time.time() - start_time))


Start of epoch 0


NameError: name 'train_step' is not defined

In [20]:
model_2.save('2_custom_model_with_TFfunction_Grappler_fashion_mnist.h5')

### Convert The above .h5 Grappler model (2_custom_model_with_TFfunction_Grappler_fashion_mnist.h5) into TFLite (3_custom_model_with_TFfunction_Grappler_fashion_mnist.tflite)

In [21]:
import pathlib

In [22]:
def ConvertTFLite(model_path, filename):
  try:
    # Loading Model
    model = tf.keras.models.load_model(model_path)
    # Converter
    converter = tf.lite.TFLiteConverter.from_keras_model(model)
    tflite_model = converter.convert()
    #Specify path
    tflite_models_dir = pathlib.Path("tflite_models/")
    tflite_models_dir.mkdir(exist_ok=True, parents=True)
    filename = filename+".tflite"
    tflite_model_file = tflite_models_dir/filename
    # Save Model
    tflite_model_file.write_bytes(tflite_model)

    return f'Converted to TFLite, path {tflite_model_file}'
  except Exception as e:
    return str(e)


In [23]:
ConvertTFLite('./2_custom_model_with_TFfunction_Grappler_fashion_mnist.h5','3_custom_model_with_TFfunction_Grappler_fashion_mnist')



'Converted to TFLite, path tflite_models/3_custom_model_with_TFfunction_Grappler_fashion_mnist.tflite'

### Evaluate and find the model load and unit inference time for above .h5 Grappler model (2_custom_model_with_TFfunction_Grappler_fashion_mnist.h5)

In [13]:
import time
mnist = tf.keras.datasets.fashion_mnist
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()
# Normalize the images
train_images = train_images / 255.0
test_images = test_images / 255.0

start_time_test_set = time.time()

model = tf.keras.models.load_model('./2_custom_model_with_TFfunction_Grappler_fashion_mnist.h5', compile = True)
model.compile(optimizer='adam',
           loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
           metrics=['accuracy'])
score = model.evaluate(test_images, test_labels, verbose =0)
print('\n', 'Test accuracy on full test set:', score[1])
results1 = {'Time to load model and infer for testset': (time.time() - start_time_test_set) }

data = test_images[0]
data = data.reshape((1, 28, 28))
data_y = train_labels[0:1]
# unit inference 
start_time_infer = time.time()
score = model.evaluate(data, data_y, verbose=0)
results2 = {'Unit infer time': (time.time() - start_time_infer) }

print (results1)
print (results2)


 Test accuracy on full test set: 0.8959
{'Time to load model and infer for testset': 1.2047083377838135}
{'Unit infer time': 0.024936676025390625}


### Evaluate and find time the model load and unit inference time of the .tflite version (3_custom_model_with_TFfunction_Grappler_fashion_mnist.tflite) of the .h5 Grappler model (2_custom_model_with_TFfunction_Grappler_fashion_mnist.h5)

In [15]:
# Evaluation function for tflite models
def evaluate_model(interpreter):
  start_time = time.time()
  input_index = interpreter.get_input_details()[0]["index"]
  output_index = interpreter.get_output_details()[0]["index"]

  # Run predictions on every image in the "test" dataset.
  prediction_digits = []
  for test_image in test_images:
    # Pre-processing: add batch dimension and convert to float32 to match with
    # the model's input data format.
    test_image = np.expand_dims(test_image, axis=0).astype(np.float32)
    interpreter.set_tensor(input_index, test_image)
  
    # Run inference.
    interpreter.invoke()

    # Post-processing: remove batch dimension and find the digit with highest
    # probability.
    output = interpreter.tensor(output_index)
    digit = np.argmax(output()[0])
    prediction_digits.append(digit)

  # Compare prediction results with ground truth labels to calculate accuracy.
  accurate_count = 0
  for index in range(len(prediction_digits)):
    if prediction_digits[index] == test_labels[index]:
      accurate_count += 1
  accuracy = accurate_count * 1.0 / len(prediction_digits)
  results = {'time': (time.time() - start_time),
             'accuracy': accuracy}


  return results

In [37]:
import time
start_time_qaware_full = time.time()
interpreter_custom_tflite_model = tf.lite.Interpreter('tflite_models/3_custom_model_with_TFfunction_Grappler_fashion_mnist.tflite')
interpreter_custom_tflite_model.allocate_tensors()
test_image = np.expand_dims(test_images[0], axis=0).astype(np.float32)

input_index = interpreter_custom_tflite_model.get_input_details()[0]["index"]
output_index = interpreter_custom_tflite_model.get_output_details()[0]["index"]

interpreter_custom_tflite_model.set_tensor(input_index, test_image)
start_time_qaware_infer = time.time()
interpreter_custom_tflite_model.invoke()
results1 = {'Unit infer time': (time.time() - start_time_qaware_infer) }
predictions = interpreter_custom_tflite_model.get_tensor(output_index)

results = {'Time to load model and infer': (time.time() - start_time_qaware_full)}
print (results)
print (results1)
evaluate_model(interpreter_custom_tflite_model)

{'Time to load model and infer': 0.0010547637939453125}
{'Unit infer time': 0.00041604042053222656}


{'time': 1.3081109523773193, 'accuracy': 0.8959}

### Convert the custom trained grappler model (2_custom_model_with_TFfunction_Grappler_fashion_mnist.h5) to int-only quantized tflite model (4_custom_train_grappler_Integer_model.tflite)

In [28]:
model = tf.keras.models.load_model('./2_custom_model_with_TFfunction_Grappler_fashion_mnist.h5', compile = True)
model.compile(optimizer='adam',
           loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
           metrics=['accuracy'])
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()
converter.optimizations = [tf.lite.Optimize.DEFAULT]
mnist_train, _ = tf.keras.datasets.fashion_mnist.load_data()
images = tf.cast(mnist_train[0], tf.float32) / 255.0
mnist_ds = tf.data.Dataset.from_tensor_slices((images)).batch(1)
def representative_data_gen():
  for input_value in mnist_ds.take(100):
    yield [input_value]

converter.representative_dataset = representative_data_gen
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.int8  # or tf.uint8
converter.inference_output_type = tf.int8  # or tf.uint8
tflite_int_quant_model = converter.convert()
tflite_model_integeronly_file = '4_custom_train_grappler_Integer_model.tflite'

with open(tflite_model_integeronly_file, 'wb') as f:
  f.write(tflite_int_quant_model)

print('Saved custom trained grappler then integer quantized model to:', tflite_model_integeronly_file)



Saved custom trained grappler then integer quantized model to: 4_custom_train_grappler_Integer_model.tflite


In [19]:
### Evaluating custom_train_grappler_Integer_model.tflite
start_time_qaware_full = time.time()
interpreter_custom_tflite_model = tf.lite.Interpreter('./tflite_models/4_custom_train_grappler_Integer_model.tflite')
interpreter_custom_tflite_model.allocate_tensors()
test_image = np.expand_dims(test_images[0], axis=0).astype(np.float32)

input_index = interpreter_custom_tflite_model.get_input_details()[0]["index"]
output_index = interpreter_custom_tflite_model.get_output_details()[0]["index"]

interpreter_custom_tflite_model.set_tensor(input_index, test_image)
start_time_qaware_infer = time.time()
interpreter_custom_tflite_model.invoke()
results1 = {'Unit infer time': (time.time() - start_time_qaware_infer) }
predictions = interpreter_custom_tflite_model.get_tensor(output_index)

results = {'Time to load model and unit infer': (time.time() - start_time_qaware_full)}
print (results)
print (results1)
evaluate_model(interpreter_custom_tflite_model)


{'Time to load model and unit infer': 0.0010979175567626953}
{'Unit infer time': 0.00033473968505859375}


{'time': 1.6632821559906006, 'accuracy': 0.897}

### Convert the custom trained grappler model (2_custom_model_with_TFfunction_Grappler_fashion_mnist.h5) to Int with float fallback quantized tflite model (5_custom_train_grappler_Int_float_model.tflite)

In [34]:
model = tf.keras.models.load_model('./2_custom_model_with_TFfunction_Grappler_fashion_mnist.h5', compile = True)
model.compile(optimizer='adam',
           loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
           metrics=['accuracy'])
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()
converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_model_int_float_quant = converter.convert()
tflite_model_int_float_quant_file = '5_custom_train_grappler_Int_float_model.tflite'

with open(tflite_model_int_float_quant_file, 'wb') as f:
  f.write(tflite_model_int_float_quant)

print('Saved custom trained grappler then int with float quantized model to:', tflite_model_int_float_quant_file)



Saved custom trained grappler then int with float quantized model to: 5_custom_train_grappler_Int_float_model.tflite


In [18]:
# Evaluate custom_train_grappler_Int_float_model.tflite

start_time_qaware_full = time.time()
interpreter_custom_tflite_model = tf.lite.Interpreter('./5_custom_train_grappler_Int_float_model.tflite')
interpreter_custom_tflite_model.allocate_tensors()
test_image = np.expand_dims(test_images[0], axis=0).astype(np.float32)

input_index = interpreter_custom_tflite_model.get_input_details()[0]["index"]
output_index = interpreter_custom_tflite_model.get_output_details()[0]["index"]

interpreter_custom_tflite_model.set_tensor(input_index, test_image)
start_time_qaware_infer = time.time()
interpreter_custom_tflite_model.invoke()
results1 = {'Unit infer time': (time.time() - start_time_qaware_infer) }
predictions = interpreter_custom_tflite_model.get_tensor(output_index)

results = {'Time to load model and unit infer': (time.time() - start_time_qaware_full)}
print (results)
print (results1)
evaluate_model(interpreter_custom_tflite_model)

{'Time to load model and infer': 0.0008296966552734375}
{'Time to only infer': 0.00027060508728027344}


{'time': 1.1398780345916748, 'accuracy': 0.8959}

### Convert the custom trained grappler model (2_custom_model_with_TFfunction_Grappler_fashion_mnist) to Float16 tflite model (6_custom_trained_grappler_float16_model.tflite)

In [33]:
model = tf.keras.models.load_model('./2_custom_model_with_TFfunction_Grappler_fashion_mnist.h5', compile = True)
model.compile(optimizer='adam',
           loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
           metrics=['accuracy'])
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_types = [tf.float16]
tflite_fp16_model = converter.convert()
tflite_model_fp16_file = '6_custom_trained_grappler_float16_model.tflite'

with open(tflite_model_fp16_file, 'wb') as f:
  f.write(tflite_fp16_model)

print('Saved custom trained grappler then float16 quantized model to:', tflite_model_fp16_file)


Saved custom trained grappler then float16 quantized model to: 6_custom_trained_grappler_float16_model.tflite


In [2]:
import time 

In [35]:
# Evaluating custom_trained_grappler_float16_model.tflite


start_time_qaware_full = time.time()
interpreter_custom_tflite_model = tf.lite.Interpreter('./6_custom_trained_grappler_float16_model.tflite')
interpreter_custom_tflite_model.allocate_tensors()
test_image = np.expand_dims(test_images[0], axis=0).astype(np.float32)

input_index = interpreter_custom_tflite_model.get_input_details()[0]["index"]
output_index = interpreter_custom_tflite_model.get_output_details()[0]["index"]

interpreter_custom_tflite_model.set_tensor(input_index, test_image)
start_time_qaware_infer = time.time()
interpreter_custom_tflite_model.invoke()
results1 = {'Unit infer time': (time.time() - start_time_qaware_infer) }
predictions = interpreter_custom_tflite_model.get_tensor(output_index)

results = {'Time to load model and unit infer': (time.time() - start_time_qaware_full)}
print (results)
print (results1)
evaluate_model(interpreter_custom_tflite_model)

{'Time to load model and infer': 0.0012693405151367188}
{'Time to only infer': 0.0003323554992675781}


{'time': 1.1033220291137695, 'accuracy': 0.8959}