<a href="https://colab.research.google.com/github/amelft81/EmbeddedAI/blob/main/Model_Quantization_and_Pruning_with_Python.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
import tensorflow as tf
import tensorflow_model_optimization as tfmot
import os
import numpy as np

# --- Configuration ---
# Path to your .h5 model files
MOBILENET_V2_MODEL_PATH = 'mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224 (1).h5'
SIMPLE_MODEL_PATH = 'simple_embedded_model.h5' # This model will be used for pruning/QAT illustration

# Output directory for optimized models
OUTPUT_DIR = 'optimized_models'
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Define a representative dataset for full integer quantization
# This should be a generator or a list of your actual input data
# For demonstration, we'll use dummy data.
def representative_dataset_gen():
    """
    A generator function for a representative dataset.
    This is crucial for full integer quantization.
    Replace this with your actual data loading logic.
    """
    # Assuming input shape for MobileNetV2 is (224, 224, 3)
    for _ in range(100): # Generate 100 samples
        data = np.random.rand(1, 224, 224, 3).astype(np.float32)
        yield [data]

# --- Helper function to get model size ---
def get_gzipped_model_size(file_path):
    """
    Returns the size of a gzipped model in bytes.
    """
    import zipfile
    _, zipped_file = tempfile.mkstemp('.zip')
    with zipfile.ZipFile(zipped_file, 'w', compression=zipfile.ZIP_DEFLATED) as f:
        f.write(file_path, os.path.basename(file_path))
    return os.path.getsize(zipped_file)

# --- 1. Load the Baseline Model ---
print(f"Loading baseline model from: {MOBILENET_V2_MODEL_PATH}")
try:
    baseline_model = tf.keras.models.load_model(MOBILENET_V2_MODEL_PATH)
    baseline_model.summary()
    baseline_model_size = os.path.getsize(MOBILENET_V2_MODEL_PATH)
    print(f"Baseline model size (raw): {baseline_model_size / 1024:.2f} KB")
except Exception as e:
    print(f"Error loading baseline MobileNetV2 model: {e}")
    # If MobileNetV2 fails to load, create a dummy model for demonstration
    print("Creating a simple dummy model for further demonstrations.")
    baseline_model = tf.keras.Sequential([
        tf.keras.layers.InputLayer(input_shape=(224, 224, 3)),
        tf.keras.layers.Conv2D(32, 3, activation='relu'),
        tf.keras.layers.MaxPooling2D(),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(10, activation='softmax')
    ])
    baseline_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    baseline_model.save(f"{OUTPUT_DIR}/dummy_baseline_model.h5")
    baseline_model_size = os.path.getsize(f"{OUTPUT_DIR}/dummy_baseline_model.h5")
    print(f"Dummy Baseline model size (raw): {baseline_model_size / 1024:.2f} KB")


# --- 2. Post-Training Quantization ---

print("\n--- Applying Post-Training Dynamic Range Quantization ---")
converter_dr = tf.lite.TFLiteConverter.from_keras_model(baseline_model)
converter_dr.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_model_dr = converter_dr.convert()

dr_model_path = os.path.join(OUTPUT_DIR, 'quantized_dynamic_range_model.tflite')
with open(dr_model_path, 'wb') as f:
    f.write(tflite_model_dr)

dr_model_size = os.path.getsize(dr_model_path)
print(f"Dynamic Range Quantized model saved to: {dr_model_path}")
print(f"Dynamic Range Quantized model size (raw): {dr_model_size / 1024:.2f} KB")
# print(f"Dynamic Range Quantized model size (gzipped): {get_gzipped_model_size(dr_model_path) / 1024:.2f} KB")


print("\n--- Applying Post-Training Full Integer Quantization ---")
converter_int = tf.lite.TFLiteConverter.from_keras_model(baseline_model)
converter_int.optimizations = [tf.lite.Optimize.DEFAULT]
converter_int.representative_dataset = representative_dataset_gen
# Ensure all operations are quantized to integers. Fallback to float if not possible.
converter_int.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
# Require full integer quantization, otherwise throw an error
converter_int.inference_input_type = tf.float32 # Input type to the model
converter_int.inference_output_type = tf.float32 # Output type from the model

try:
    tflite_model_int = converter_int.convert()
    int_model_path = os.path.join(OUTPUT_DIR, 'quantized_full_integer_model.tflite')
    with open(int_model_path, 'wb') as f:
        f.write(tflite_model_int)

    int_model_size = os.path.getsize(int_model_path)
    print(f"Full Integer Quantized model saved to: {int_model_path}")
    print(f"Full Integer Quantized model size (raw): {int_model_size / 1024:.2f} KB")
    # print(f"Full Integer Quantized model size (gzipped): {get_gzipped_model_size(int_model_path) / 1024:.2f} KB")
except Exception as e:
    print(f"Full Integer Quantization failed. This might happen if the model operations are not fully supported for INT8. Error: {e}")


# --- 3. Pruning (Requires retraining) ---

print("\n--- Applying Pruning (Demonstration with a simple model) ---")
# For pruning, we'll create a simple model and train it with pruning.
# This assumes you have a training dataset and validation dataset.
pruning_model = tf.keras.Sequential([
    tf.keras.layers.InputLayer(input_shape=(224, 224, 3)),
    tf.keras.layers.Conv2D(32, 3, activation='relu'),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(10, activation='softmax')
])

# Define pruning parameters
pruning_params = {
    'pruning_schedule': tfmot.sparsity.keras.PolynomialDecay(
        initial_sparsity=0.50,
        final_sparsity=0.80,
        begin_step=0,
        end_step=1000 # Adjust based on your training steps/epochs
    )
}

# Apply pruning to the model
pruned_model = tfmot.sparsity.keras.prune_low_magnitude(pruning_model, **pruning_params)
pruned_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
pruned_model.summary()

# Dummy data for training demonstration
train_images = np.random.rand(100, 224, 224, 3).astype(np.float32)
train_labels = np.random.randint(0, 10, 100)
test_images = np.random.rand(20, 224, 224, 3).astype(np.float32)
test_labels = np.random.randint(0, 10, 20)

print("Training pruned model (using dummy data)...")
# You would replace this with your actual training data and more epochs
pruned_model.fit(
    train_images,
    train_labels,
    epochs=2, # Keep low for demonstration
    validation_data=(test_images, test_labels),
    callbacks=[tfmot.sparsity.keras.UpdatePruningStep()]
)

# Strip pruning wrappers for inference
model_for_export = tfmot.sparsity.keras.strip_pruning(pruned_model)
pruned_keras_path = os.path.join(OUTPUT_DIR, 'pruned_keras_model.h5')
model_for_export.save(pruned_keras_path, include_optimizer=False)
print(f"Pruned Keras model saved to: {pruned_keras_path}")
pruned_keras_size = os.path.getsize(pruned_keras_path)
print(f"Pruned Keras model size (raw): {pruned_keras_size / 1024:.2f} KB")

# Convert pruned Keras model to TFLite
converter_pruned = tf.lite.TFLiteConverter.from_keras_model(model_for_export)
tflite_pruned_model = converter_pruned.convert()

pruned_tflite_path = os.path.join(OUTPUT_DIR, 'pruned_tflite_model.tflite')
with open(pruned_tflite_path, 'wb') as f:
    f.write(tflite_pruned_model)

pruned_tflite_size = os.path.getsize(pruned_tflite_path)
print(f"Pruned TFLite model saved to: {pruned_tflite_path}")
print(f"Pruned TFLite model size (raw): {pruned_tflite_size / 1024:.2f} KB")
# print(f"Pruned TFLite model size (gzipped): {get_gzipped_model_size(pruned_tflite_path) / 1024:.2f} KB")


# --- 4. Quantization-Aware Training (QAT) ---

print("\n--- Applying Quantization-Aware Training (Demonstration with a simple model) ---")
# For QAT, we'll use a simple model and train it with QAT applied.
qat_model = tf.keras.Sequential([
    tf.keras.layers.InputLayer(input_shape=(224, 224, 3)),
    tf.keras.layers.Conv2D(32, 3, activation='relu'),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(10, activation='softmax')
])

# Apply quantization-aware training wrappers
quant_aware_model = tfmot.quantization.keras.quantize_model(qat_model)
quant_aware_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
quant_aware_model.summary()

print("Training quantization-aware model (using dummy data)...")
# You would replace this with your actual training data and more epochs
quant_aware_model.fit(
    train_images,
    train_labels,
    epochs=2, # Keep low for demonstration
    validation_data=(test_images, test_labels)
)

# Convert QAT model to TFLite
converter_qat = tf.lite.TFLiteConverter.from_keras_model(quant_aware_model)
converter_qat.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_qat_model = converter_qat.convert()

qat_tflite_path = os.path.join(OUTPUT_DIR, 'qat_tflite_model.tflite')
with open(qat_tflite_path, 'wb') as f:
    f.write(tflite_qat_model)

qat_tflite_size = os.path.getsize(qat_tflite_path)
print(f"Quantization-Aware Trained TFLite model saved to: {qat_tflite_path}")
print(f"Quantization-Aware Trained TFLite model size (raw): {qat_tflite_size / 1024:.2f} KB")
# print(f"Quantization-Aware Trained TFLite model size (gzipped): {get_gzipped_model_size(qat_tflite_path) / 1024:.2f} KB")

print("\n--- Summary of Model Sizes (Raw .tflite) ---")
print(f"Baseline model (original .h5): {baseline_model_size / 1024:.2f} KB")
print(f"Dynamic Range Quantized TFLite: {dr_model_size / 1024:.2f} KB")
if 'int_model_size' in locals():
    print(f"Full Integer Quantized TFLite: {int_model_size / 1024:.2f} KB")
print(f"Pruned TFLite: {pruned_tflite_size / 1024:.2f} KB")
print(f"Quantization-Aware Trained TFLite: {qat_tflite_size / 1024:.2f} KB")

Loading baseline model from: mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224 (1).h5
Error loading baseline MobileNetV2 model: No file or directory found at mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224 (1).h5
Creating a simple dummy model for further demonstrations.
Dummy Baseline model size (raw): 15422.55 KB

--- Applying Post-Training Dynamic Range Quantization ---


  saving_api.save_model(


Dynamic Range Quantized model saved to: optimized_models/quantized_dynamic_range_model.tflite
Dynamic Range Quantized model size (raw): 3856.03 KB

--- Applying Post-Training Full Integer Quantization ---




Full Integer Quantized model saved to: optimized_models/quantized_full_integer_model.tflite
Full Integer Quantized model size (raw): 3854.87 KB

--- Applying Pruning (Demonstration with a simple model) ---
Model: "sequential_7"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 prune_low_magnitude_conv2d  (None, 222, 222, 32)      1762      
 _7 (PruneLowMagnitude)                                          
                                                                 
 prune_low_magnitude_max_po  (None, 111, 111, 32)      1         
 oling2d_7 (PruneLowMagnitu                                      
 de)                                                             
                                                                 
 prune_low_magnitude_flatte  (None, 394272)            1         
 n_7 (PruneLowMagnitude)                                         
                                              



Epoch 2/2


  saving_api.save_model(


Pruned Keras model saved to: optimized_models/pruned_keras_model.h5
Pruned Keras model size (raw): 15421.12 KB
Pruned TFLite model saved to: optimized_models/pruned_tflite_model.tflite
Pruned TFLite model size (raw): 15407.12 KB

--- Applying Quantization-Aware Training (Demonstration with a simple model) ---
Model: "sequential_8"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 quantize_layer_2 (Quantize  (None, 224, 224, 3)       3         
 Layer)                                                          
                                                                 
 quant_conv2d_8 (QuantizeWr  (None, 222, 222, 32)      963       
 apperV2)                                                        
                                                                 
 quant_max_pooling2d_8 (Qua  (None, 111, 111, 32)      1         
 ntizeWrapperV2)                                                 
       



Epoch 2/2
Quantization-Aware Trained TFLite model saved to: optimized_models/qat_tflite_model.tflite
Quantization-Aware Trained TFLite model size (raw): 3855.23 KB

--- Summary of Model Sizes (Raw .tflite) ---
Baseline model (original .h5): 15422.55 KB
Dynamic Range Quantized TFLite: 3856.03 KB
Full Integer Quantized TFLite: 3854.87 KB
Pruned TFLite: 15407.12 KB
Quantization-Aware Trained TFLite: 3855.23 KB




In [6]:
!pip install -q tensorflow-model-optimization

In [4]:
import tensorflow as tf
import tensorflow_model_optimization as tfmot # Import after installation
import os
import numpy as np
import tempfile # Added missing import

# --- Configuration ---
# Path to your .h5 model files
# Ensure these files exist or update the paths
MOBILENET_V2_MODEL_PATH = 'mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224 (1).h5'
SIMPLE_MODEL_PATH = 'simple_embedded_model.h5' # This model will be used for pruning/QAT illustration

# Output directory for optimized models
OUTPUT_DIR = 'optimized_models'
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Define a representative dataset for full integer quantization
# This should be a generator or a list of your actual input data
# For demonstration, we'll use dummy data.
def representative_dataset_gen():
    """
    A generator function for a representative dataset.
    This is crucial for full integer quantization.
    Replace this with your actual data loading logic.
    """
    # Assuming input shape for MobileNetV2 is (224, 224, 3)
    for _ in range(100): # Generate 100 samples
        data = np.random.rand(1, 224, 224, 3).astype(np.float32)
        yield [data]

# --- Helper function to get model size ---
def get_gzipped_model_size(file_path):
    """
    Returns the size of a gzipped model in bytes.
    """
    import zipfile
    _, zipped_file = tempfile.mkstemp('.zip')
    with zipfile.ZipFile(zipped_file, 'w', compression=zipfile.ZIP_DEFLATED) as f:
        f.write(file_path, os.path.basename(file_path))
    return os.path.getsize(zipped_file)

# --- 1. Load the Baseline Model ---
print(f"Loading baseline model from: {MOBILENET_V2_MODEL_PATH}")
try:
    baseline_model = tf.keras.models.load_model(MOBILENET_V2_MODEL_PATH)
    baseline_model.summary()
    baseline_model_size = os.path.getsize(MOBILENET_V2_MODEL_PATH)
    print(f"Baseline model size (raw): {baseline_model_size / 1024:.2f} KB")
except Exception as e:
    print(f"Error loading baseline MobileNetV2 model: {e}")
    # If MobileNetV2 fails to load, create a dummy model for demonstration
    print("Creating a simple dummy model for further demonstrations.")
    baseline_model = tf.keras.Sequential([
        tf.keras.layers.InputLayer(input_shape=(224, 224, 3)),
        tf.keras.layers.Conv2D(32, 3, activation='relu'),
        tf.keras.layers.MaxPooling2D(),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(10, activation='softmax')
    ])
    baseline_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    # Ensure the dummy model is saved to the OUTPUT_DIR
    dummy_model_path = os.path.join(OUTPUT_DIR, "dummy_baseline_model.h5")
    baseline_model.save(dummy_model_path)
    baseline_model_size = os.path.getsize(dummy_model_path)
    print(f"Dummy Baseline model size (raw): {baseline_model_size / 1024:.2f} KB")


# --- 2. Post-Training Quantization ---

print("\n--- Applying Post-Training Dynamic Range Quantization ---")
converter_dr = tf.lite.TFLiteConverter.from_keras_model(baseline_model)
converter_dr.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_model_dr = converter_dr.convert()

dr_model_path = os.path.join(OUTPUT_DIR, 'quantized_dynamic_range_model.tflite')
with open(dr_model_path, 'wb') as f:
    f.write(tflite_model_dr)

dr_model_size = os.path.getsize(dr_model_path)
print(f"Dynamic Range Quantized model saved to: {dr_model_path}")
print(f"Dynamic Range Quantized model size (raw): {dr_model_size / 1024:.2f} KB")
# print(f"Dynamic Range Quantized model size (gzipped): {get_gzipped_model_size(dr_model_path) / 1024:.2f} KB")


print("\n--- Applying Post-Training Full Integer Quantization ---")
converter_int = tf.lite.TFLiteConverter.from_keras_model(baseline_model)
converter_int.optimizations = [tf.lite.Optimize.DEFAULT]
converter_int.representative_dataset = representative_dataset_gen
# Ensure all operations are quantized to integers. Fallback to float if not possible.
converter_int.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
# Require full integer quantization, otherwise throw an error
# converter_int.inference_input_type = tf.float32 # This line is not needed and can cause issues
# converter_int.inference_output_type = tf.float32 # This line is not needed and can cause issues

try:
    tflite_model_int = converter_int.convert()
    int_model_path = os.path.join(OUTPUT_DIR, 'quantized_full_integer_model.tflite')
    with open(int_model_path, 'wb') as f:
        f.write(tflite_model_int)

    int_model_size = os.path.getsize(int_model_path)
    print(f"Full Integer Quantized model saved to: {int_model_path}")
    print(f"Full Integer Quantized model size (raw): {int_model_size / 1024:.2f} KB")
    # print(f"Full Integer Quantized model size (gzipped): {get_gzipped_model_size(int_model_path) / 1024:.2f} KB")
except Exception as e:
    print(f"Full Integer Quantization failed. This might happen if the model operations are not fully supported for INT8. Error: {e}")


# --- 3. Pruning (Requires retraining) ---

print("\n--- Applying Pruning (Demonstration with a simple model) ---")
# For pruning, we'll create a simple model and train it with pruning.
# This assumes you have a training dataset and validation dataset.
pruning_model = tf.keras.Sequential([
    tf.keras.layers.InputLayer(input_shape=(224, 224, 3)),
    tf.keras.layers.Conv2D(32, 3, activation='relu'),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(10, activation='softmax')
])

# Define pruning parameters
pruning_params = {
    'pruning_schedule': tfmot.sparsity.keras.PolynomialDecay(
        initial_sparsity=0.50,
        final_sparsity=0.80,
        begin_step=0,
        end_step=1000 # Adjust based on your training steps/epochs
    )
}

# Apply pruning to the model
pruned_model = tfmot.sparsity.keras.prune_low_magnitude(pruning_model, **pruning_params)
pruned_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
pruned_model.summary()

# Dummy data for training demonstration
train_images = np.random.rand(100, 224, 224, 3).astype(np.float32)
train_labels = np.random.randint(0, 10, 100)
test_images = np.random.rand(20, 224, 224, 3).astype(np.float32)
test_labels = np.random.randint(0, 10, 20)

print("Training pruned model (using dummy data)...")
# You would replace this with your actual training data and more epochs
pruned_model.fit(
    train_images,
    train_labels,
    epochs=2, # Keep low for demonstration
    validation_data=(test_images, test_labels),
    callbacks=[tfmot.sparsity.keras.UpdatePruningStep()]
)

# Strip pruning wrappers for inference
model_for_export = tfmot.sparsity.keras.strip_pruning(pruned_model)
pruned_keras_path = os.path.join(OUTPUT_DIR, 'pruned_keras_model.h5')
model_for_export.save(pruned_keras_path, include_optimizer=False)
print(f"Pruned Keras model saved to: {pruned_keras_path}")
pruned_keras_size = os.path.getsize(pruned_keras_path)
print(f"Pruned Keras model size (raw): {pruned_keras_size / 1024:.2f} KB")

# Convert pruned Keras model to TFLite
converter_pruned = tf.lite.TFLiteConverter.from_keras_model(model_for_export)
tflite_pruned_model = converter_pruned.convert()

pruned_tflite_path = os.path.join(OUTPUT_DIR, 'pruned_tflite_model.tflite')
with open(pruned_tflite_path, 'wb') as f:
    f.write(tflite_pruned_model)

pruned_tflite_size = os.path.getsize(pruned_tflite_path)
print(f"Pruned TFLite model saved to: {pruned_tflite_path}")
print(f"Pruned TFLite model size (raw): {pruned_tflite_size / 1024:.2f} KB")
# print(f"Pruned TFLite model size (gzipped): {get_gzipped_model_size(pruned_tflite_path) / 1024:.2f} KB")


# --- 4. Quantization-Aware Training (QAT) ---

print("\n--- Applying Quantization-Aware Training (Demonstration with a simple model) ---")
# For QAT, we'll use a simple model and train it with QAT applied.
qat_model = tf.keras.Sequential([
    tf.keras.layers.InputLayer(input_shape=(224, 224, 3)),
    tf.keras.layers.Conv2D(32, 3, activation='relu'),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(10, activation='softmax')
])

# Apply quantization-aware training wrappers
quant_aware_model = tfmot.quantization.keras.quantize_model(qat_model)
quant_aware_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
quant_aware_model.summary()

print("Training quantization-aware model (using dummy data)...")
# You would replace this with your actual training data and more epochs
quant_aware_model.fit(
    train_images,
    train_labels,
    epochs=2, # Keep low for demonstration
    validation_data=(test_images, test_labels)
)

# Convert QAT model to TFLite
converter_qat = tf.lite.TFLiteConverter.from_keras_model(quant_aware_model)
converter_qat.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_qat_model = converter_qat.convert()

qat_tflite_path = os.path.join(OUTPUT_DIR, 'qat_tflite_model.tflite')
with open(qat_tflite_path, 'wb') as f:
    f.write(tflite_qat_model)

qat_tflite_size = os.path.getsize(qat_tflite_path)
print(f"Quantization-Aware Trained TFLite model saved to: {qat_tflite_path}")
print(f"Quantization-Aware Trained TFLite model size (raw): {qat_tflite_size / 1024:.2f} KB")
# print(f"Quantization-Aware Trained TFLite model size (gzipped): {get_gzipped_model_size(qat_tflite_path) / 1024:.2f} KB")

print("\n--- Summary of Model Sizes (Raw .tflite) ---")
print(f"Baseline model (original .h5): {baseline_model_size / 1024:.2f} KB")
print(f"Dynamic Range Quantized TFLite: {dr_model_size / 1024:.2f} KB")
if 'int_model_size' in locals():
    print(f"Full Integer Quantized TFLite: {int_model_size / 1024:.2f} KB")
print(f"Pruned TFLite: {pruned_tflite_size / 1024:.2f} KB")
print(f"Quantization-Aware Trained TFLite: {qat_tflite_size / 1024:.2f} KB")

Loading baseline model from: mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224 (1).h5
Error loading baseline MobileNetV2 model: No file or directory found at mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224 (1).h5
Creating a simple dummy model for further demonstrations.
Dummy Baseline model size (raw): 15422.55 KB

--- Applying Post-Training Dynamic Range Quantization ---


  saving_api.save_model(


Dynamic Range Quantized model saved to: optimized_models/quantized_dynamic_range_model.tflite
Dynamic Range Quantized model size (raw): 3856.03 KB

--- Applying Post-Training Full Integer Quantization ---




Full Integer Quantized model saved to: optimized_models/quantized_full_integer_model.tflite
Full Integer Quantized model size (raw): 3854.87 KB

--- Applying Pruning (Demonstration with a simple model) ---
Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 prune_low_magnitude_conv2d  (None, 222, 222, 32)      1762      
 _4 (PruneLowMagnitude)                                          
                                                                 
 prune_low_magnitude_max_po  (None, 111, 111, 32)      1         
 oling2d_4 (PruneLowMagnitu                                      
 de)                                                             
                                                                 
 prune_low_magnitude_flatte  (None, 394272)            1         
 n_4 (PruneLowMagnitude)                                         
                                              

  saving_api.save_model(


Pruned Keras model saved to: optimized_models/pruned_keras_model.h5
Pruned Keras model size (raw): 15421.12 KB
Pruned TFLite model saved to: optimized_models/pruned_tflite_model.tflite
Pruned TFLite model size (raw): 15407.12 KB

--- Applying Quantization-Aware Training (Demonstration with a simple model) ---
Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 quantize_layer_1 (Quantize  (None, 224, 224, 3)       3         
 Layer)                                                          
                                                                 
 quant_conv2d_5 (QuantizeWr  (None, 222, 222, 32)      963       
 apperV2)                                                        
                                                                 
 quant_max_pooling2d_5 (Qua  (None, 111, 111, 32)      1         
 ntizeWrapperV2)                                                 
       



Quantization-Aware Trained TFLite model saved to: optimized_models/qat_tflite_model.tflite
Quantization-Aware Trained TFLite model size (raw): 3855.23 KB

--- Summary of Model Sizes (Raw .tflite) ---
Baseline model (original .h5): 15422.55 KB
Dynamic Range Quantized TFLite: 3856.03 KB
Full Integer Quantized TFLite: 3854.87 KB
Pruned TFLite: 15407.12 KB
Quantization-Aware Trained TFLite: 3855.23 KB
