In [37]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import os
from tqdm.notebook import tqdm

In [38]:
# Train LetNet-5 on MNIST dataset

# Load MNIST dataset
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Normalize the data
x_train = x_train #/ 255.0
x_test = x_test #/ 255.0

# Add a channels dimension
x_train = x_train[..., tf.newaxis]
x_test = x_test[..., tf.newaxis]

# Shuffle the data
train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(10000).batch(32)
test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(32)

# Create the model

# Define the LeNet-5 model
lenet_model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(filters=6, kernel_size=(5, 5), activation='relu', input_shape=(28, 28, 1)),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
    tf.keras.layers.Conv2D(filters=16, kernel_size=(5, 5), activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(units=120, activation='relu'),
    tf.keras.layers.Dense(units=84, activation='relu'),
    tf.keras.layers.Dense(units=10, activation='softmax')
])

# Compile the model
lenet_model.compile(optimizer=tf.keras.optimizers.Adam(),
                    loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                    metrics=['accuracy'])

# Train the model
lenet_model.fit(train_ds, epochs=10)

# Evaluate the model
lenet_model.evaluate(test_ds)

# Save the model
lenet_model.save('lenet_model.h5')

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [39]:
input_data = np.asarray(x_train[::10], dtype=np.float32)

In [40]:
# Convert to TFLite (float32)
converter = tf.lite.TFLiteConverter.from_keras_model(lenet_model)
tflite_model_float32 = converter.convert()

# Save the float32 model
with open('model_float32.tflite', 'wb') as f:
    f.write(tflite_model_float32)

# Convert to TFLite (int8)
def representative_dataset_gen():
    for _ in range(100):  # Adjust the range according to your dataset size
        # Provide data samples from your dataset
        yield [input_data]  # input_data should match the input shape of your model

converter = tf.lite.TFLiteConverter.from_keras_model(lenet_model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_dataset_gen
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.uint8
converter.inference_output_type = tf.uint8

tflite_model_int8 = converter.convert()

# Save the int8 model
with open('model_int8.tflite', 'wb') as f:
    f.write(tflite_model_int8)

INFO:tensorflow:Assets written to: /var/folders/j_/1kx7__wx15d4bvtts4f0h18r0000gq/T/tmp3a61zpgf/assets


INFO:tensorflow:Assets written to: /var/folders/j_/1kx7__wx15d4bvtts4f0h18r0000gq/T/tmp3a61zpgf/assets
2024-01-17 02:43:51.669698: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:363] Ignored output_format.
2024-01-17 02:43:51.669714: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:366] Ignored drop_control_dependency.
2024-01-17 02:43:51.669836: I tensorflow/cc/saved_model/reader.cc:43] Reading SavedModel from: /var/folders/j_/1kx7__wx15d4bvtts4f0h18r0000gq/T/tmp3a61zpgf
2024-01-17 02:43:51.671707: I tensorflow/cc/saved_model/reader.cc:107] Reading meta graph with tags { serve }
2024-01-17 02:43:51.671719: I tensorflow/cc/saved_model/reader.cc:148] Reading SavedModel debug info (if present) from: /var/folders/j_/1kx7__wx15d4bvtts4f0h18r0000gq/T/tmp3a61zpgf
2024-01-17 02:43:51.681101: I tensorflow/cc/saved_model/loader.cc:210] Restoring SavedModel bundle.
2024-01-17 02:43:51.735203: I tensorflow/cc/saved_model/loader.cc:194] Running initializatio

INFO:tensorflow:Assets written to: /var/folders/j_/1kx7__wx15d4bvtts4f0h18r0000gq/T/tmp0ojad0wp/assets


INFO:tensorflow:Assets written to: /var/folders/j_/1kx7__wx15d4bvtts4f0h18r0000gq/T/tmp0ojad0wp/assets
2024-01-17 02:43:54.926079: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:363] Ignored output_format.
2024-01-17 02:43:54.926097: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:366] Ignored drop_control_dependency.
2024-01-17 02:43:54.926222: I tensorflow/cc/saved_model/reader.cc:43] Reading SavedModel from: /var/folders/j_/1kx7__wx15d4bvtts4f0h18r0000gq/T/tmp0ojad0wp
2024-01-17 02:43:54.928019: I tensorflow/cc/saved_model/reader.cc:107] Reading meta graph with tags { serve }
2024-01-17 02:43:54.928034: I tensorflow/cc/saved_model/reader.cc:148] Reading SavedModel debug info (if present) from: /var/folders/j_/1kx7__wx15d4bvtts4f0h18r0000gq/T/tmp0ojad0wp
2024-01-17 02:43:54.937417: I tensorflow/cc/saved_model/loader.cc:210] Restoring SavedModel bundle.
2024-01-17 02:43:54.990329: I tensorflow/cc/saved_model/loader.cc:194] Running initializatio

In [44]:
import numpy as np
import tensorflow as tf

# Function to load a TFLite model
def load_tflite_model(path):
    with open(path, 'rb') as f:
        model_content = f.read()
    interpreter = tf.lite.Interpreter(model_content=model_content)
    interpreter.allocate_tensors()
    return interpreter

# Function to run inference
def run_inference(interpreter, input_data):
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()
    
    # Check if input type is quantized, then transform data to uint8
    if input_details[0]['dtype'] == np.uint8:
        input_scale, input_zero_point = input_details[0]["quantization"]
        input_data = input_data / input_scale + input_zero_point

    input_data = np.array(input_data, dtype=input_details[0]['dtype'])
    interpreter.set_tensor(input_details[0]['index'], input_data)

    interpreter.invoke()
    output_data = interpreter.get_tensor(output_details[0]['index'])
    return output_data

# Load the float32 and int8 models
float32_interpreter = load_tflite_model('model_float32.tflite')
int8_interpreter = load_tflite_model('model_int8.tflite')

# Prepare your input data (modify this according to your needs)
# Example: input_data = np.array([your_input_data])
input_data = x_test  # Example input. Replace with real data.

# Run inference
pred_32 = []
pred_8 = []
for t in tqdm(input_data):
    x = np.asarray([t])
    float32_results = run_inference(float32_interpreter, x)
    int8_results = run_inference(int8_interpreter, x)
    pred_32.append(float32_results)
    pred_8.append(int8_results)


# Process the results as needed
# Example: print(float32_results), print(int8_results)
print("Float32 Model Results:", float32_results)
print("Int8 Model Results:", int8_results)

  0%|          | 0/10000 [00:00<?, ?it/s]

Float32 Model Results: [[8.3245204e-15 8.9493327e-13 5.3035524e-15 1.3010989e-17 3.8570120e-09
  1.7525302e-10 1.0000000e+00 5.0880333e-24 8.3130777e-12 1.7814556e-15]]
Int8 Model Results: [[  0   0   0   0   0   0 255   0   0   0]]


In [45]:
y_32 = np.asarray(pred_32).argmax(axis=2)[:,0]
y_8 = np.asarray(pred_8).argmax(axis=2)[:,0]

In [46]:
from sklearn.metrics import balanced_accuracy_score

In [47]:
print("Float32:", balanced_accuracy_score(y_test, y_32))
print("Int8:", balanced_accuracy_score(y_test, y_8))

Float32: 0.9822541720826299
Int8: 0.9828215834116726
