Necessary imports

In [1]:
!pip install tensorflow-model-optimization

Collecting tensorflow-model-optimization
  Downloading tensorflow_model_optimization-0.8.0-py2.py3-none-any.whl.metadata (904 bytes)
Downloading tensorflow_model_optimization-0.8.0-py2.py3-none-any.whl (242 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m242.5/242.5 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: tensorflow-model-optimization
Successfully installed tensorflow-model-optimization-0.8.0


In [2]:
import os
import tensorflow as tf
import tensorflow_model_optimization as tfmot

In [3]:
# function to create lenet model
def build_lenet5_mnist():


    model = tf.keras.Sequential([

        # 4 layers -> 2 convolutional and 2 pooling
        tf.keras.layers.Conv2D(6, kernel_size = 5, strides = 1, padding = 'same', activation = 'relu', input_shape = (28, 28, 1)),
        tf.keras.layers.MaxPooling2D(pool_size = 2, strides = 2),

        tf.keras.layers.Conv2D(16, kernel_size = 5, strides = 1, activation = 'relu'),
        tf.keras.layers.MaxPooling2D(pool_size = 2, strides = 2),

        # 2D -> 1D
        tf.keras.layers.Flatten(),

        # 3 fully connected layers
        tf.keras.layers.Dense(120, activation = 'relu'),
        tf.keras.layers.Dense(84, activation = 'relu'),
        tf.keras.layers.Dense(10, activation = 'softmax')
    ])

    return model

In [4]:
# Define a pruning schedule
def build_lenet5_mnist_with_pruning():
    # Define pruning schedule (starts pruning at 0% and increases to 50% pruning over 10 epochs)
    pruning_schedule = tfmot.sparsity.keras.PolynomialDecay(
        initial_sparsity=0.0,  # Start with no pruning
        final_sparsity=0.5,    # End with 50% sparsity
        begin_step=0,          # Start pruning from the beginning
        end_step=2000          # End pruning after 2000 steps
    )

    # Build model with pruning
    model = tf.keras.Sequential([
        tfmot.sparsity.keras.prune_low_magnitude(  # Apply pruning to this layer
            tf.keras.layers.Conv2D(6, kernel_size=5, strides=1, padding='same', activation='relu', input_shape=(28, 28, 1)),
            pruning_schedule
        ),
        tf.keras.layers.MaxPooling2D(pool_size=2, strides=2),

        tfmot.sparsity.keras.prune_low_magnitude(  # Apply pruning to this layer
            tf.keras.layers.Conv2D(16, kernel_size=5, strides=1, activation='relu'),
            pruning_schedule
        ),
        tf.keras.layers.MaxPooling2D(pool_size=2, strides=2),

        # Flatten layer (no pruning here)
        tf.keras.layers.Flatten(),

        tfmot.sparsity.keras.prune_low_magnitude(  # Apply pruning to this layer
            tf.keras.layers.Dense(120, activation='relu'),
            pruning_schedule
        ),
        tfmot.sparsity.keras.prune_low_magnitude(  # Apply pruning to this layer
            tf.keras.layers.Dense(84, activation='relu'),
            pruning_schedule
        ),
        tf.keras.layers.Dense(10, activation='softmax')  # No pruning here
    ])

    return model


In [5]:
# function to load MNIST dataset and do some preprocessing
def load_data(validation_split = 0.25):
    (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

    # Normalization can reduce training time significantly and (usually) increases the model's accuracy
    x_train = x_train.astype('float32') / 255.0
    x_test = x_test.astype('float32') / 255.0

    # expand dimensions from (28, 28) to (28, 28, 1) to match the input format of the first convolutional layer of the model
    x_train = x_train[..., tf.newaxis]
    x_test = x_test[..., tf.newaxis]

    # Make sure that your labels are in numerical form
    y_train = tf.keras.utils.to_categorical(y_train, 10)
    y_test = tf.keras.utils.to_categorical(y_test, 10)

    # A subset of the dataset can be used in order to monitor how our network behaves during training
    # (validation dataset) and helps us avoid overfitting the model to the training dataset.
    # The network's parameters are not updated when examining this subset of data

    # create validation dataset
    if validation_split is not None:
        num_validation_samples = int(validation_split * x_train.shape[0])
        x_train, x_val = x_train[:-num_validation_samples], x_train[-num_validation_samples:]
        y_train, y_val = y_train[:-num_validation_samples], y_train[-num_validation_samples:]
        return (x_train, y_train), (x_val, y_val), (x_test, y_test)

    else:
        return (x_train, y_train), (x_test, y_test), (x_test, y_test)

In [6]:
# function to train the model
def train_model(model, X_train, y_train, X_val, y_val, epochs = 25, learning_rate = 0.001,
                patience = 5, batch_size = 32):

    # You can experiment with different optimizers and learning rates (no need to focus on that though !!!)
    model.compile(optimizer = tf.keras.optimizers.SGD(learning_rate = learning_rate, momentum = 0.9),
                  loss = 'categorical_crossentropy', metrics = ['accuracy'])

    # Apply early stopping to speed up training and avoid overfitting (very helpful with smaller datasets)
    # In this instance, if the validation loss does not drop over 0.001 for <patience> number of epochs,
    # the training stops.

    early_stopping = tf.keras.callbacks.EarlyStopping(monitor = 'val_loss', patience = patience,
                                   min_delta = 0.001, restore_best_weights = True)

    # Train the model
    model.fit(X_train, y_train, epochs = epochs, batch_size = batch_size,
              validation_data = (X_val, y_val),
              callbacks = [early_stopping])

    return model

In [7]:
def train_prune_model(model, X_train, y_train, X_val, y_val, epochs = 25, learning_rate = 0.001, patience = 5, batch_size = 32):
  # Compile model with pruning
  model = build_lenet5_mnist_with_pruning()
  model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

  # Model summary to view the layers with pruning applied
  model.summary()

  # Training model
  # You should use a callback to ensure the pruning process is applied during training
  pruning_callbacks = [
      tfmot.sparsity.keras.UpdatePruningStep()  # Callback to update pruning during training
  ]

  # Train model (pruning will be applied during training)
  model.fit(X_train, y_train, epochs=epochs, batch_size = batch_size, validation_data=(X_val, y_val), callbacks=pruning_callbacks)

In [8]:
# function to evaluate the accuracy of the trained model
def evaluate_model(model, X_test, y_test):
    _, test_accuracy = model.evaluate(X_test, y_test)
    print(f"Test Accuracy: {test_accuracy * 100:.2f}%")
    return test_accuracy * 100

In [9]:
# function to provide a small dataset sample for integer quantization
def representative_dataset():
    """"
    Representative dataset for integer quantization (calibration data to scale the
    weights and inputs to the integer domain)
    """
    for i in range(100):
        yield [X_train[i:i+1].astype('float32')]

In [10]:
# function to cnvert the model to TFLite format (to use in our device)
def convert_to_tflite(model, filename = "model.tflite"):
    converter = tf.lite.TFLiteConverter.from_keras_model(model)
    tflite_model = converter.convert()
    with open(filename, "wb") as f:
        f.write(tflite_model)
    print(f"Model converted to TFLite and saved as {filename}")

    model_size = os.path.getsize(filename) / 1024  # Size in KB
    print(f"TFLite Model Size: {model_size:.2f} KB")

    return filename


In [11]:
# function to convert the model to a quantized TFLite version (reduced model size and better efficiency)
def convert_to_quantized_tflite(model, filename = "model.tflite"):
    converter = tf.lite.TFLiteConverter.from_keras_model(model)

    converter.optimizations = [tf.lite.Optimize.DEFAULT]
    converter.representative_dataset = representative_dataset
    converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
    converter.inference_input_type = tf.uint8
    converter.inference_output_type = tf.uint8

    tflite_model = converter.convert()

    with open(filename, "wb") as f:
        f.write(tflite_model)

    print(f"Quantized model converted to TFLite and saved as {filename}")

    # Calculate and print the size of the TFLite model
    model_size = os.path.getsize(filename) / 1024  # Size in KB
    print(f"Quantized TFLite Model Size: {model_size:.2f} KB")

    return filename

In [12]:
# function to perform inference for a tflite model
def tflite_inference(tflite_model_path, X_test, y_test):
    """
    Perform inference using a TFLite model. This function can be used
    in order to evaluate the performance of your models after applying your
    optimization techniques (i.e. Quantization, pruning etc).
    """

    interpreter = tf.lite.Interpreter(model_path=tflite_model_path)
    interpreter.allocate_tensors()

    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()

    # In case we quantized our model to Full integer format, we need to be careful and
    # check if the input type is UINT8 --> if so, we must normalize data to the
    # quantized range [1]

    input_dtype = input_details[0]['dtype']
    print(input_dtype)

    # [1]
    if input_dtype == tf.uint8:
        scale, zero_point = input_details[0]['quantization']
        X_test = (X_test / scale + zero_point).astype(input_dtype)

    # iterate over predictions and store results
    predictions = []
    for i in range(X_test.shape[0]):
        input_data = X_test[i:i+1]
        interpreter.set_tensor(input_details[0]['index'], input_data)
        interpreter.invoke()
        output_data = interpreter.get_tensor(output_details[0]['index'])
        predictions.append(output_data)

    # iterate over predictions to find correct ones
    correct = 0
    for i, prediction in enumerate(predictions):
        predicted_label = tf.argmax(prediction[0]).numpy()
        true_label = tf.argmax(y_test[i]).numpy()
        if predicted_label == true_label:
            correct += 1

    # print the accuracy of our model
    accuracy = correct / len(X_test) * 100
    print(f"TFLite Model Accuracy: {accuracy:.2f}%")
    return accuracy

In [13]:
# Specify device (CPU or GPU)
device_name = "/GPU:0" if tf.config.list_physical_devices('GPU') else "/CPU:0"
print(f"Training on: {device_name}")

with tf.device(device_name):

    # lenet model
    model = build_lenet5_mnist()
    model.summary()

    epochs = [1, 1]
    learning_rate = [0.001, 0.0005]
    BATCH_SZ = 32
    patience = 3

    # load mnist
    (X_train, y_train), (X_val, y_val), (X_test, y_test) = load_data()

    # perform training
    for i, (e, lr) in enumerate(zip(epochs, learning_rate)):
        print(f"\nStarting training iteration {i + 1} with {e} epochs and learning rate {lr}")
        model = train_model(model, X_train, y_train, X_val, y_val, e, lr, patience, BATCH_SZ)

    print("\n\nFinal Evaluation on Test Data:")
    initial_model_accuracy = evaluate_model(model, X_test, y_test)

    ########## Let's convert the model to TFLITE FORMAT ###########
    tflite_model_path = convert_to_tflite(model)

    # Without applying any optimizations to our model, the accuracy should remain the same ...
    print("\n\nPerforming inference with TFLite model...")
    tflite_no_opt_accuracy = tflite_inference(tflite_model_path, X_test, y_test)

    ##### QUANTIZATION #####

    integer_tflite_path = convert_to_quantized_tflite(model, filename = "quantized_model.tflite")

    print(f"\nInference with Integer quantization...")
    tflite_int_quant_acc = tflite_inference(integer_tflite_path, X_test, y_test)

Training on: /CPU:0
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 28, 28, 6)         156       
                                                                 
 max_pooling2d (MaxPooling2  (None, 14, 14, 6)         0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 10, 10, 16)        2416      
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 5, 5, 16)          0         
 g2D)                                                            
                                                                 
 flatten (Flatten)           (None, 400)               0         
                                                                 
 dense (Dense)               (None, 



Quantized model converted to TFLite and saved as quantized_model.tflite
Quantized TFLite Model Size: 71.05 KB

Inference with Integer quantization...
<class 'numpy.uint8'>
TFLite Model Accuracy: 96.11%


In [14]:
def load_mnist(i = 0):
    # Load MNIST dataset
    (train_images, train_labels), (test_images, test_labels) = tf.keras.datasets.mnist.load_data()

    # Normalize the images to be between 0 and 1
    # comment below for the quantized
    # train_images = train_images.astype('float32') / 255.0
    # test_images = test_images.astype('float32') / 255.0

    # Reshape the images to add the channel dimension (28, 28, 1)
    train_images = np.expand_dims(train_images, axis=-1)  # (28, 28, 1)
    test_images = np.expand_dims(test_images, axis=-1)  # (28, 28, 1)

    # Return a single image with the batch dimension (1, 28, 28, 1)
    # Adding the batch dimension for a single image from the test set
    return np.expand_dims(test_images[i], axis=0), test_labels[i]  # Shape will be (1, 28, 28, 1)

In [15]:
import tensorflow as tf
import numpy as np

In [16]:
#QUANTIZED gia ena input
import time
import numpy as np
import tensorflow as tf

# Load the TFLite model
tflite_model_path = 'quantized_model.tflite'  # Replace with your model's path
interpreter = tf.lite.Interpreter(model_path=tflite_model_path)

# Allocate tensors (this will initialize the interpreter and load the model)
interpreter.allocate_tensors()

# Get input and output details
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

# Get the shape of the input tensor
input_shape = input_details[0]['shape']
input_dtype = input_details[0]['dtype']

# Load test data (assuming load_mnist function exists)
test_images, test_labels = load_mnist(6)

# Choose an image from the test set
image = test_images  # First image (28x28)
label = test_labels  # Ground truth label

# Ensure the image has the correct dtype (uint8 for quantized models)
image = image.astype(input_dtype)

# Set the input tensor
interpreter.set_tensor(input_details[0]['index'], image)

# Measure inference time
start_time = time.time()  # Start timer
interpreter.invoke()       # Run inference
end_time = time.time()     # End timer

# Compute elapsed time
inference_time = (end_time - start_time) * 1000  # Convert to milliseconds

# Get the output tensor
output_data = interpreter.get_tensor(output_details[0]['index'])

# Normalize the output data if needed
output_data = output_data.astype('float32') / 256.0

# Apply softmax if necessary
probabilities = tf.nn.softmax(output_data[0]).numpy()

# Print raw output and predicted class
print("Raw Model Output (Logits):")
print(output_data[0])

predicted_class = np.argmax(probabilities)

print(f"Predicted Class: {predicted_class}")
print(f"Ground Truth Label: {label}")

# Print inference time
print(f"Inference Time: {inference_time:.2f} ms")


Raw Model Output (Logits):
[0.         0.         0.         0.         0.984375   0.
 0.         0.0078125  0.00390625 0.00390625]
Predicted Class: 4
Ground Truth Label: 4
Inference Time: 0.28 ms


In [17]:
#quantized gia 10 input
import time
import numpy as np
import tensorflow as tf

# Load the TFLite model
tflite_model_path = 'quantized_model.tflite'  # Replace with your model's path
interpreter = tf.lite.Interpreter(model_path=tflite_model_path)

# Allocate tensors (this will initialize the interpreter and load the model)
interpreter.allocate_tensors()

# Get input and output details
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

# Get the shape and dtype of the input tensor
input_shape = input_details[0]['shape']
input_dtype = input_details[0]['dtype']

# Load test data
for i in range (1,11):
  test_images, test_labels = load_mnist(i)  # Ensure load_mnist is defined
  image = test_images  # Select first image
  label = test_labels  # Corresponding label

# Set the input tensor
  interpreter.set_tensor(input_details[0]['index'], image)

# Measure inference time
  start_time = time.time()  # Start timer
  interpreter.invoke()       # Run inference
  end_time = time.time()     # End timer

# Compute elapsed time
  inference_time += (end_time - start_time) * 1000  # Convert to milliseconds

# Get the output tensor
  output_data = interpreter.get_tensor(output_details[0]['index'])

# Normalize the output data
  output_data = output_data.astype('float32') / 256.0

# Apply softmax to get probabilities
  probabilities = tf.nn.softmax(output_data[0]).numpy()

# Print raw output and predicted class
  #print("Raw Model Output (Logits):")
  #print(output_data[0])

  predicted_class = np.argmax(probabilities)
  print(f"Predicted Class: {predicted_class}")
  print(f"Ground Truth Label: {label}")

# Print inference time
print(f"Inference Time: {inference_time/(i):.2f} ms")

Predicted Class: 2
Ground Truth Label: 2
Predicted Class: 1
Ground Truth Label: 1
Predicted Class: 0
Ground Truth Label: 0
Predicted Class: 4
Ground Truth Label: 4
Predicted Class: 1
Ground Truth Label: 1
Predicted Class: 4
Ground Truth Label: 4
Predicted Class: 9
Ground Truth Label: 9
Predicted Class: 5
Ground Truth Label: 5
Predicted Class: 9
Ground Truth Label: 9
Predicted Class: 0
Ground Truth Label: 0
Inference Time: 0.16 ms


In [18]:
#aplo gia 10 input
#RUN MODEL
import time
import numpy as np
import tensorflow as tf

# Load the TFLite model
tflite_model_path = 'model.tflite'  # Replace with your model's path
interpreter = tf.lite.Interpreter(model_path=tflite_model_path)

# Allocate tensors (this will initialize the interpreter and load the model)
interpreter.allocate_tensors()

# Get input and output details
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

# Get the shape and dtype of the input tensor
input_shape = input_details[0]['shape']
input_dtype = input_details[0]['dtype']
inference_time = 0

# Load test data (assuming load_mnist function exists)
for i in range (1,10):
  test_images, test_labels = load_mnist(i)  # Ensure load_mnist is defined
  image = test_images.astype(np.float32) / 255.0  # Normalize to [0,1] for float32 model
  label = test_labels  # Corresponding label

# Ensure input tensor has the correct shape and type
  image = np.reshape(image, input_shape).astype(np.float32)  # Explicitly cast to float32

# Set the input tensor
  interpreter.set_tensor(input_details[0]['index'], image)

# Measure inference time
  start_time = time.time()  # Start timer
  interpreter.invoke()       # Run inference
  end_time = time.time()     # End timer

  # Compute elapsed time
  inference_time += (end_time - start_time) * 1000  # Convert to milliseconds

# Get the output tensor
  output_data = interpreter.get_tensor(output_details[0]['index'])

# Apply softmax to get probabilities
  probabilities = tf.nn.softmax(output_data[0]).numpy()

# Print raw output and predicted class
  #print("Raw Model Output (Logits):")
  #print(output_data[0])

  predicted_class = np.argmax(probabilities)
  print(f"Predicted Class: {predicted_class}")
  print(f"Ground Truth Label: {label}")

# Print inference time
print(f"Inference Time: {inference_time/(i):.2f} ms")

Predicted Class: 2
Ground Truth Label: 2
Predicted Class: 1
Ground Truth Label: 1
Predicted Class: 0
Ground Truth Label: 0
Predicted Class: 4
Ground Truth Label: 4
Predicted Class: 1
Ground Truth Label: 1
Predicted Class: 4
Ground Truth Label: 4
Predicted Class: 9
Ground Truth Label: 9
Predicted Class: 5
Ground Truth Label: 5
Predicted Class: 9
Ground Truth Label: 9
Inference Time: 0.12 ms


In [19]:
#aplo gia 1 input
#RUN MODEL
import time
import numpy as np
import tensorflow as tf

# Load the TFLite model
tflite_model_path = 'model.tflite'  # Replace with your model's path
interpreter = tf.lite.Interpreter(model_path=tflite_model_path)

# Allocate tensors (this will initialize the interpreter and load the model)
interpreter.allocate_tensors()

# Get input and output details
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

# Get the shape and dtype of the input tensor
input_shape = input_details[0]['shape']
input_dtype = input_details[0]['dtype']
inference_time = 0

# Load test data (assuming load_mnist function exists)

test_images, test_labels = load_mnist(1)  # Ensure load_mnist is defined
image = test_images.astype(np.float32) / 255.0  # Normalize to [0,1] for float32 model
label = test_labels  # Corresponding label

# Ensure input tensor has the correct shape and type
image = np.reshape(image, input_shape).astype(np.float32)  # Explicitly cast to float32

# Set the input tensor
interpreter.set_tensor(input_details[0]['index'], image)

# Measure inference time
start_time = time.time()  # Start timer
interpreter.invoke()       # Run inference
end_time = time.time()     # End timer

# Compute elapsed time
inference_time += (end_time - start_time) * 1000  # Convert to milliseconds

# Get the output tensor
output_data = interpreter.get_tensor(output_details[0]['index'])

# Apply softmax to get probabilities
probabilities = tf.nn.softmax(output_data[0]).numpy()

# Print raw output and predicted class
#print("Raw Model Output (Logits):")
#print(output_data[0])

predicted_class = np.argmax(probabilities)
print(f"Predicted Class: {predicted_class}")
print(f"Ground Truth Label: {label}")

# Print inference time
print(f"Inference Time: {inference_time:.2f} ms")

Predicted Class: 2
Ground Truth Label: 2
Inference Time: 0.18 ms
