<a href="https://colab.research.google.com/github/amelft81/EmbeddedAI/blob/main/Python_Code_for_Model_Quantization_Only.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import tensorflow as tf
import os
import numpy as np

# --- Configuration ---
# IMPORTANT: Ensure this path correctly points to your 'simple_embedded_model.h5' file.
# Based on your previous output, '/content/simple_embedded_model.h5' was the path used.
SIMPLE_MODEL_PATH = '/content/simple_embedded_model.h5' # Adjust if your path is different

# Output directory for optimized models
OUTPUT_DIR = 'optimized_models'
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Define a representative dataset for full integer quantization
# This should be a generator or a list of your actual input data.
# It's crucial for calibrating the quantization ranges for activations.
def representative_dataset_gen(model_input_shape, num_samples=100):
    """
    A generator function for a representative dataset.
    Generates dummy data based on the model's input shape.
    REPLACE THIS WITH YOUR ACTUAL DATA LOADING LOGIC.
    """
    for _ in range(num_samples):
        # Generate random data matching the model's input shape (excluding batch dim)
        data = np.random.rand(1, *model_input_shape).astype(np.float32)
        yield [data]

def quantize_model_only(model_path):
    """
    Loads a Keras model and applies post-training dynamic range and full integer quantization,
    saving the resulting TFLite models.

    Args:
        model_path (str): Path to the original .h5 model file.
    """
    print(f"\n--- Starting Quantization for {os.path.basename(model_path)} ---")

    # 1. Load the original model
    model = None
    try:
        # Attempt to load the model directly
        model = tf.keras.models.load_model(model_path)
        print(f"Original model '{os.path.basename(model_path)}' loaded successfully.")
        model.summary()
    except Exception as e:
        print(f"ERROR: Could not load model from '{model_path}'.")
        print(f"Details: {e}")
        print("Attempting to load with specific architecture for simple_embedded_model.h5...")
        try:
            # Based on previous debugging, simple_embedded_model.h5 likely has InputLayer(shape=(10,))
            # followed by Dense(8, relu) and Dense(1, sigmoid).
            input_shape_for_dummy = (10,)
            import re
            match = re.search(r"'batch_shape': \[None, (\d+)\]", str(e))
            if match:
                input_shape_for_dummy = (int(match.group(1)),)

            model = tf.keras.Sequential([
                tf.keras.layers.InputLayer(input_shape=input_shape_for_dummy),
                tf.keras.layers.Dense(8, activation='relu'),
                tf.keras.layers.Dense(1, activation='sigmoid')
            ])
            model.load_weights(model_path)
            print(f"Model architecture defined and weights loaded from '{os.path.basename(model_path)}'.")
            model.summary() # Show summary after successful loading
        except Exception as load_weights_e:
            print(f"CRITICAL ERROR: Failed to load model even with specific architecture attempt: {load_weights_e}")
            print("Please provide the exact Keras architecture of your 'simple_embedded_model.h5' if this persists.")
            print("Exiting quantization process.")
            return

    # Get the input shape for representative dataset generation
    model_input_shape = model.input_shape[1:]

    # --- 2. Apply Post-Training Dynamic Range Quantization ---
    print("\n--- Applying Post-Training Dynamic Range Quantization ---")
    converter_dr = tf.lite.TFLiteConverter.from_keras_model(model)
    converter_dr.optimizations = [tf.lite.Optimize.DEFAULT] # This enables dynamic range quantization by default

    tflite_model_dr = converter_dr.convert()

    dr_model_path = os.path.join(OUTPUT_DIR, 'quantized_dynamic_range_only_model.tflite')
    with open(dr_model_path, 'wb') as f:
        f.write(tflite_model_dr)

    dr_model_size_kb = os.path.getsize(dr_model_path) / 1024
    print(f"Dynamic Range Quantized model saved to: {dr_model_path}")
    print(f"Dynamic Range Quantized model size: {dr_model_size_kb:.2f} KB")

    # --- 3. Apply Post-Training Full Integer Quantization ---
    print("\n--- Applying Post-Training Full Integer Quantization ---")
    converter_int = tf.lite.TFLiteConverter.from_keras_model(model)
    converter_int.optimizations = [tf.lite.Optimize.DEFAULT]
    converter_int.representative_dataset = lambda: representative_dataset_gen(model_input_shape) # Pass input shape to generator

    # Ensure all operations are quantized to integers. Fallback to float if not possible.
    converter_int.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
    # Set input/output types to float32 for inference, TFLite will handle conversion internally
    converter_int.inference_input_type = tf.float32
    converter_int.inference_output_type = tf.float32

    try:
        tflite_model_int = converter_int.convert()
        int_model_path = os.path.join(OUTPUT_DIR, 'quantized_full_integer_only_model.tflite')
        with open(int_model_path, 'wb') as f:
            f.write(tflite_model_int)

        int_model_size_kb = os.path.getsize(int_model_path) / 1024
        print(f"Full Integer Quantized model saved to: {int_model_path}")
        print(f"Full Integer Quantized model size: {int_model_size_kb:.2f} KB")
    except Exception as e:
        print(f"Full Integer Quantization failed. This might happen if the model operations are not fully supported for INT8 or representative dataset is insufficient. Error: {e}")

if __name__ == "__main__":
    # Ensure you have TensorFlow installed:
    # pip install tensorflow

    # Call the function to quantize your simple_embedded_model.h5
    quantize_model_only(SIMPLE_MODEL_PATH)


--- Starting Quantization for simple_embedded_model.h5 ---




Original model 'simple_embedded_model.h5' loaded successfully.



--- Applying Post-Training Dynamic Range Quantization ---
Saved artifact at '/tmp/tmphwmhe4up'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 10), dtype=tf.float32, name='input_layer')
Output Type:
  TensorSpec(shape=(None, 1), dtype=tf.float32, name=None)
Captures:
  139254315911312: TensorSpec(shape=(), dtype=tf.resource, name=None)
  139254315914576: TensorSpec(shape=(), dtype=tf.resource, name=None)
  139254315917264: TensorSpec(shape=(), dtype=tf.resource, name=None)
  139254315914192: TensorSpec(shape=(), dtype=tf.resource, name=None)
Dynamic Range Quantized model saved to: optimized_models/quantized_dynamic_range_only_model.tflite
Dynamic Range Quantized model size: 2.02 KB

--- Applying Post-Training Full Integer Quantization ---
Saved artifact at '/tmp/tmpwr2s71pt'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 10), dtype=tf.float32, name='input_la



Full Integer Quantized model saved to: optimized_models/quantized_full_integer_only_model.tflite
Full Integer Quantized model size: 2.58 KB
