In [18]:
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers

In [19]:
# Model / data parameters
num_classes = 10
input_shape = (28, 28, 1)

# Load the data and split it between train and test sets
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

# Scale images to the [0, 1] range
x_train = x_train.astype("float32") / 255
x_test = x_test.astype("float32") / 255
# Make sure images have shape (28, 28, 1)
x_train = np.expand_dims(x_train, -1)
x_test = np.expand_dims(x_test, -1)
print("x_train shape:", x_train.shape)
print(x_train.shape[0], "train samples")
print(x_test.shape[0], "test samples")


# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

x_train shape: (60000, 28, 28, 1)
60000 train samples
10000 test samples


In [20]:
model = keras.Sequential(
    [
        keras.Input(shape=input_shape),
        layers.Conv2D(32, kernel_size=(3, 3), activation="relu"),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Conv2D(64, kernel_size=(3, 3), activation="relu"),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Flatten(),
        layers.Dropout(0.5),
        layers.Dense(num_classes, activation="softmax"),
    ]
)

model.summary()


Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_2 (Conv2D)           (None, 26, 26, 32)        320       
                                                                 
 max_pooling2d_2 (MaxPooling  (None, 13, 13, 32)       0         
 2D)                                                             
                                                                 
 conv2d_3 (Conv2D)           (None, 11, 11, 64)        18496     
                                                                 
 max_pooling2d_3 (MaxPooling  (None, 5, 5, 64)         0         
 2D)                                                             
                                                                 
 flatten_1 (Flatten)         (None, 1600)              0         
                                                                 
 dropout_1 (Dropout)         (None, 1600)             

In [21]:
batch_size = 128
epochs = 15

model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=0.1)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.callbacks.History at 0x7fa668698eb0>

In [22]:
score = model.evaluate(x_test, y_test, verbose=0)
print("Test loss:", score[0])
print("Test accuracy:", score[1])


Test loss: 0.026330914348363876
Test accuracy: 0.9905999898910522


In [23]:
import os
import h5py
import matplotlib.pyplot as plt
from sys import getsizeof

In [24]:
keras_model_name = 'tf_model_mnist.h5'

In [25]:
model.save(keras_model_name)

In [26]:
def get_file_size(file_path):
    size = os.path.getsize(file_path)
    return size

In [27]:
def convert_bytes(size, unit=None):
    if unit == "KB":
        return print('File size: ' + str(round(size / 1024, 3)) + ' Kilobytes')
    elif unit == "MB":
        return print('File size: ' + str(round(size / (1024 * 1024), 3)) + ' Megabytes')
    else:
        return print('File size: ' + str(size) + ' bytes')

In [28]:
convert_bytes(get_file_size(keras_model_name), "MB")

File size: 0.434 Megabytes


In [29]:
keras_model_size = get_file_size(keras_model_name)

In [30]:
score = model.evaluate(x_test, y_test, verbose=2)
print("Test loss:", score[0])
print("Test accuracy:", score[1])

313/313 - 0s - loss: 0.0263 - accuracy: 0.9906 - 360ms/epoch - 1ms/step
Test loss: 0.026330914348363876
Test accuracy: 0.9905999898910522


In [34]:
tf_lite_model_filename = "tf_lite_model.tflite"

In [35]:
import tensorflow as tf


tf_lite_converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = tf_lite_converter.convert()



INFO:tensorflow:Assets written to: /tmp/tmp2_521sdc/assets


INFO:tensorflow:Assets written to: /tmp/tmp2_521sdc/assets
2022-11-29 22:52:19.988054: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:362] Ignored output_format.
2022-11-29 22:52:19.988081: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:365] Ignored drop_control_dependency.
2022-11-29 22:52:19.988171: I tensorflow/cc/saved_model/reader.cc:45] Reading SavedModel from: /tmp/tmp2_521sdc
2022-11-29 22:52:19.988916: I tensorflow/cc/saved_model/reader.cc:89] Reading meta graph with tags { serve }
2022-11-29 22:52:19.988929: I tensorflow/cc/saved_model/reader.cc:130] Reading SavedModel debug info (if present) from: /tmp/tmp2_521sdc
2022-11-29 22:52:19.991220: I tensorflow/cc/saved_model/loader.cc:229] Restoring SavedModel bundle.
2022-11-29 22:52:20.013862: I tensorflow/cc/saved_model/loader.cc:213] Running initialization op on SavedModel bundle at path: /tmp/tmp2_521sdc
2022-11-29 22:52:20.019511: I tensorflow/cc/saved_model/loader.cc:305] SavedModel

In [36]:
tflite_model_name = tf_lite_model_filename
open(tflite_model_name, "wb").write(tflite_model)

142352

In [38]:
convert_bytes(get_file_size(tf_lite_model_filename), "KB")

File size: 139.016 Kilobytes


In [39]:
tflite_file_size = get_file_size(tf_lite_model_filename)

In [46]:
interpreter = tf.lite.Interpreter(model_path = tf_lite_model_filename)
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
print("Input Shape:", input_details[0]['shape'])
print("Input Type:", input_details[0]['dtype'])
print("Output Shape:", output_details[0]['shape'])
print("Output Type:", output_details[0]['dtype'])

Input Shape: [ 1 28 28  1]
Input Type: <class 'numpy.float32'>
Output Shape: [ 1 10]
Output Type: <class 'numpy.float32'>


In [48]:
interpreter.resize_tensor_input(input_details[0]['index'], (10000, 28, 28, 1))
interpreter.resize_tensor_input(output_details[0]['index'], (10000, 10))
interpreter.allocate_tensors()
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
print("Input Shape:", input_details[0]['shape'])
print("Input Type:", input_details[0]['dtype'])
print("Output Shape:", output_details[0]['shape'])
print("Output Type:", output_details[0]['dtype'])

Input Shape: [10000    28    28     1]
Input Type: <class 'numpy.float32'>
Output Shape: [10000    10]
Output Type: <class 'numpy.float32'>


In [49]:
x_test.dtype

dtype('float32')

In [50]:
test_imgs_numpy = np.array(x_test, dtype=np.float32)

In [51]:
test_imgs_numpy.dtype

dtype('float32')

In [52]:
interpreter.set_tensor(input_details[0]['index'], test_imgs_numpy)
interpreter.invoke()
tflite_model_predictions = interpreter.get_tensor(output_details[0]['index'])
print("Prediction results shape:", tflite_model_predictions.shape)
prediction_classes = np.argmax(tflite_model_predictions, axis=1)

Prediction results shape: (10000, 10)


In [58]:
y_classes = np.argmax(y_test, axis=1)
print(y_classes[:5])

[7 2 1 0 4]


In [57]:
print(prediction_classes[:5])

[7 2 1 0 4]


In [59]:
acc = accuracy_score(prediction_classes, y_classes)

In [60]:
print('Test accuracy TFLITE model is {}%'.format(round(100*acc, 2)))

Test accuracy TFLITE model is 99.06%


In [61]:
tflite_file_size/keras_model_size

0.3126526452655808

In [70]:
TF_LITE_MODEL_FLOAT_16_FILE_NAME = "tf_lite_float_16_model.tflite"

In [71]:
tf_lite_converter = tf.lite.TFLiteConverter.from_keras_model(model)
tf_lite_converter.optimizations = [tf.lite.Optimize.DEFAULT]
tf_lite_converter.target_spec.supported_types = [tf.float16]
tflite_model = tf_lite_converter.convert()



INFO:tensorflow:Assets written to: /tmp/tmpz0lkobw7/assets


INFO:tensorflow:Assets written to: /tmp/tmpz0lkobw7/assets
2022-11-29 23:12:30.761973: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:362] Ignored output_format.
2022-11-29 23:12:30.761993: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:365] Ignored drop_control_dependency.
2022-11-29 23:12:30.762090: I tensorflow/cc/saved_model/reader.cc:45] Reading SavedModel from: /tmp/tmpz0lkobw7
2022-11-29 23:12:30.762750: I tensorflow/cc/saved_model/reader.cc:89] Reading meta graph with tags { serve }
2022-11-29 23:12:30.762762: I tensorflow/cc/saved_model/reader.cc:130] Reading SavedModel debug info (if present) from: /tmp/tmpz0lkobw7
2022-11-29 23:12:30.764920: I tensorflow/cc/saved_model/loader.cc:229] Restoring SavedModel bundle.
2022-11-29 23:12:30.786945: I tensorflow/cc/saved_model/loader.cc:213] Running initialization op on SavedModel bundle at path: /tmp/tmpz0lkobw7
2022-11-29 23:12:30.792388: I tensorflow/cc/saved_model/loader.cc:305] SavedModel

In [72]:
tflite_model_name = TF_LITE_MODEL_FLOAT_16_FILE_NAME
open(tflite_model_name, "wb").write(tflite_model)

73612

In [73]:
convert_bytes(get_file_size(TF_LITE_MODEL_FLOAT_16_FILE_NAME), "KB")

File size: 71.887 Kilobytes


In [74]:
tflite_float_16_file_size = get_file_size(TF_LITE_MODEL_FLOAT_16_FILE_NAME)

In [75]:
tflite_float_16_file_size/keras_model_size

0.1616765940997663

In [76]:
tflite_float_16_file_size/tflite_file_size

0.5171125098347757

In [77]:
TF_LITE_SIZE_QUANT_MODEL_FILE_NAME = "tf_lite_quant_model.tflite"

In [78]:
tf_lite_converter = tf.lite.TFLiteConverter.from_keras_model(model)
tf_lite_converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_SIZE]
tflite_model = tf_lite_converter.convert()



INFO:tensorflow:Assets written to: /tmp/tmpows26a9w/assets


INFO:tensorflow:Assets written to: /tmp/tmpows26a9w/assets
2022-11-29 23:12:53.445349: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:362] Ignored output_format.
2022-11-29 23:12:53.445369: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:365] Ignored drop_control_dependency.
2022-11-29 23:12:53.445463: I tensorflow/cc/saved_model/reader.cc:45] Reading SavedModel from: /tmp/tmpows26a9w
2022-11-29 23:12:53.446237: I tensorflow/cc/saved_model/reader.cc:89] Reading meta graph with tags { serve }
2022-11-29 23:12:53.446249: I tensorflow/cc/saved_model/reader.cc:130] Reading SavedModel debug info (if present) from: /tmp/tmpows26a9w
2022-11-29 23:12:53.449106: I tensorflow/cc/saved_model/loader.cc:229] Restoring SavedModel bundle.
2022-11-29 23:12:53.472814: I tensorflow/cc/saved_model/loader.cc:213] Running initialization op on SavedModel bundle at path: /tmp/tmpows26a9w
2022-11-29 23:12:53.478388: I tensorflow/cc/saved_model/loader.cc:305] SavedModel

In [79]:
tflite_model_name = TF_LITE_SIZE_QUANT_MODEL_FILE_NAME
open(tflite_model_name, "wb").write(tflite_model)

39936

In [80]:
convert_bytes(get_file_size(TF_LITE_SIZE_QUANT_MODEL_FILE_NAME), "KB")

File size: 39.0 Kilobytes


In [81]:
tflite_float_quant_file_size = get_file_size(TF_LITE_SIZE_QUANT_MODEL_FILE_NAME)

In [82]:
tflite_float_quant_file_size/keras_model_size

0.0877128248379105

In [83]:
tflite_float_quant_file_size/ tflite_float_16_file_size

0.5425202412650112

In [84]:
interpreter = tf.lite.Interpreter(model_path = TF_LITE_SIZE_QUANT_MODEL_FILE_NAME)
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
print("Input Shape:", input_details[0]['shape'])
print("Input Type:", input_details[0]['dtype'])
print("Output Shape:", output_details[0]['shape'])
print("Output Type:", output_details[0]['dtype'])

Input Shape: [ 1 28 28  1]
Input Type: <class 'numpy.float32'>
Output Shape: [ 1 10]
Output Type: <class 'numpy.float32'>


In [85]:
interpreter.resize_tensor_input(input_details[0]['index'], (10000, 28, 28, 1))
interpreter.resize_tensor_input(output_details[0]['index'], (10000, 10))
interpreter.allocate_tensors()
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
print("Input Shape:", input_details[0]['shape'])
print("Input Type:", input_details[0]['dtype'])
print("Output Shape:", output_details[0]['shape'])
print("Output Type:", output_details[0]['dtype'])

Input Shape: [10000    28    28     1]
Input Type: <class 'numpy.float32'>
Output Shape: [10000    10]
Output Type: <class 'numpy.float32'>


In [86]:
x_test.dtype

dtype('float32')

In [87]:
test_imgs_numpy = np.array(x_test, dtype=np.float32)

In [88]:
interpreter.set_tensor(input_details[0]['index'], test_imgs_numpy)
interpreter.invoke()
tflite_model_predictions = interpreter.get_tensor(output_details[0]['index'])
print("Prediction results shape:", tflite_model_predictions.shape)
prediction_classes = np.argmax(tflite_model_predictions, axis=1)

Prediction results shape: (10000, 10)


In [89]:
acc = accuracy_score(prediction_classes, y_classes)

In [90]:
print('Test accuracy TFLITE Quantized model is {}%'.format(round(100*acc, 2)))

Test accuracy TFLITE Quantized model is 99.06%
