<a href="https://colab.research.google.com/github/amelft81/EmbeddedAI/blob/main/ModelForBenchmarking.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import tensorflow as tf
import tensorflow_model_optimization as tfmot
import os
import numpy as np # For dummy data, replace with your actual data loading
from sklearn.model_selection import train_test_split # For splitting actual data
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score # For comprehensive evaluation

# --- Configuration ---
# Path to your .h5 model files
# Assuming 'simple_embedded_model.h5' is the model you want to optimize
SIMPLE_MODEL_PATH = 'simple_embedded_model.h5' # Ensure this path is correct

# Output directory for optimized models
OUTPUT_DIR = 'optimized_models'
os.makedirs(OUTPUT_DIR, exist_ok=True)

# --- Load your actual dataset ---
# IMPORTANT: Replace this with your actual data loading and preprocessing.
# This dummy data simulates a scenario with input_shape=(10,) and 2 output classes.
# Adjust X_actual and y_actual based on your specific model's input/output.
print("Loading/Generating actual dataset (replace with your real data)...")
num_samples_actual = 5000 # Use a larger, realistic number of samples
input_shape_for_model = (10,) # Based on typical simple_embedded_model.h5 structure
num_output_classes = 2 # Example: for binary classification or 2 output units

# Generate dummy data mimicking a real dataset structure
X_actual = np.random.rand(num_samples_actual, *input_shape_for_model).astype(np.float32)
y_actual = np.random.randint(0, num_output_classes, num_samples_actual)

# Split into training, validation, and test sets
X_train_actual, X_test_actual, y_train_actual, y_test_actual = train_test_split(
    X_actual, y_actual, test_size=0.2, random_state=42, stratify=y_actual if num_output_classes > 1 else None
)
X_train_actual, X_val_actual, y_train_actual, y_val_actual = train_test_split(
    X_train_actual, y_train_actual, test_size=0.25, random_state=42, stratify=y_train_actual if num_output_classes > 1 else None
) # 0.25 of 0.8 is 0.2, so 60% train, 20% val, 20% test

print(f"Actual training data shape: X={X_train_actual.shape}, y={y_train_actual.shape}")
print(f"Actual validation data shape: X={X_val_actual.shape}, y={y_val_actual.shape}")
print(f"Actual test data shape: X={X_test_actual.shape}, y={y_test_actual.shape}")

# --- Helper function to get model size (already in your notebook) ---
def get_gzipped_model_size(file_path):
    import zipfile, tempfile
    _, zipped_file = tempfile.mkstemp('.zip')
    with zipfile.ZipFile(zipped_file, 'w', compression=zipfile.ZIP_DEFLATED) as f:
        f.write(file_path, os.path.basename(file_path))
    return os.path.getsize(zipped_file)

# --- 1. Load the Baseline Model ---
print(f"\n--- Loading Baseline Model from: {SIMPLE_MODEL_PATH} ---")
try:
    baseline_model = tf.keras.models.load_model(SIMPLE_MODEL_PATH)
    print(f"Original model '{os.path.basename(SIMPLE_MODEL_PATH)}' loaded successfully.")
    baseline_model.summary()
    baseline_model_size = os.path.getsize(SIMPLE_MODEL_PATH)
    print(f"Baseline model size (raw): {baseline_model_size / 1024:.2f} KB")
except Exception as e:
    print(f"Error loading baseline model: {e}")
    print("Attempting to define a common simple model architecture for 'simple_embedded_model.h5'...")
    # This architecture is based on typical simple_embedded_model.h5 structure
    # from your previous file context. Adjust if your actual model differs.
    baseline_model = tf.keras.Sequential([
        tf.keras.layers.InputLayer(input_shape=input_shape_for_model),
        tf.keras.layers.Dense(8, activation='relu'),
        tf.keras.layers.Dense(num_output_classes, activation='softmax') # Use softmax for multi-class, sigmoid for binary
    ])
    # Compile the dummy model. Use appropriate loss/metrics for your task.
    if num_output_classes > 1:
        baseline_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    else: # Binary classification
        baseline_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    baseline_model.save(f"{OUTPUT_DIR}/dummy_baseline_model.h5")
    baseline_model_size = os.path.getsize(f"{OUTPUT_DIR}/dummy_baseline_model.h5")
    print(f"Dummy Baseline model created and saved. Size (raw): {baseline_model_size / 1024:.2f} KB")


# --- 2. Quantization-Aware Training (QAT) ---

print("\n--- Applying Quantization-Aware Training (QAT) ---")

# Apply quantization-aware training wrappers to the baseline model
quant_aware_model = tfmot.quantization.keras.quantize_model(baseline_model)

# Compile the QAT model
# Use the same optimizer, loss, and metrics as your baseline model's training
if num_output_classes > 1:
    quant_aware_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
else: # Binary classification
    quant_aware_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

print("Quantization-aware model compiled.")
quant_aware_model.summary()

# Train the quantization-aware model with your actual training data
print("\nTraining quantization-aware model (using actual data)...")
epochs_qat = 10 # Increase epochs for more thorough training
batch_size_qat = 32 # Adjust batch size based on your data and memory

# Add callbacks for better training control
callbacks = [
    tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True),
    tf.keras.callbacks.TensorBoard(log_dir=os.path.join(OUTPUT_DIR, 'logs/qat'))
]

quant_aware_model.fit(
    X_train_actual,
    y_train_actual,
    epochs=epochs_qat,
    batch_size=batch_size_qat,
    validation_data=(X_val_actual, y_val_actual),
    callbacks=callbacks,
    verbose=1
)
print("Quantization-aware model training complete.")

# Evaluate the QAT model on the test set
print("\nEvaluating Quantization-Aware Trained model on test set...")
qat_loss, qat_accuracy = quant_aware_model.evaluate(X_test_actual, y_test_actual, verbose=0)
print(f"QAT Model Test Loss: {qat_loss:.4f}")
print(f"QAT Model Test Accuracy: {qat_accuracy:.4f}")

# Convert QAT model to TFLite for deployment
print("\nConverting QAT model to TFLite...")
converter_qat = tf.lite.TFLiteConverter.from_keras_model(quant_aware_model)
converter_qat.optimizations = [tf.lite.Optimize.DEFAULT]

# For full integer quantization from QAT, you typically don't need a representative dataset,
# as the quantization parameters are learned during training.
# However, if you want a fully integer model where input/output are also integer (for specific hardware),
# you might specify:
# converter_qat.inference_input_type = tf.int8
# converter_qat.inference_output_type = tf.int8
# And then provide a representative dataset for input/output calibration if needed.
# For general embedded use, float32 input/output with int8 internal ops is common.
converter_qat.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter_qat.inference_input_type = tf.float32 # Keep float input for ease of use
converter_qat.inference_output_type = tf.float32 # Keep float output

try:
    tflite_qat_model = converter_qat.convert()
    qat_tflite_path = os.path.join(OUTPUT_DIR, 'qat_tflite_model_enhanced.tflite')
    with open(qat_tflite_path, 'wb') as f:
        f.write(tflite_qat_model)

    qat_tflite_size = os.path.getsize(qat_tflite_path)
    print(f"Quantization-Aware Trained TFLite model saved to: {qat_tflite_path}")
    print(f"Quantization-Aware Trained TFLite model size (raw): {qat_tflite_size / 1024:.2f} KB")
    print(f"Quantization-Aware Trained TFLite model size (gzipped): {get_gzipped_model_size(qat_tflite_path) / 1024:.2f} KB")

    # Optional: Test the TFLite model for end-to-end accuracy
    print("\nVerifying TFLite QAT model accuracy...")
    interpreter = tf.lite.Interpreter(model_content=tflite_qat_model)
    interpreter.allocate_tensors()
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()

    predictions = []
    for i in range(len(X_test_actual)):
        input_data = X_test_actual[i:i+1].astype(input_details[0]['dtype'])
        interpreter.set_tensor(input_details[0]['index'], input_data)
        interpreter.invoke()
        output_data = interpreter.get_tensor(output_details[0]['index'])
        predictions.append(output_data)

    predictions = np.array(predictions).squeeze()

    # Convert predictions to class labels if applicable
    if num_output_classes > 1:
        predicted_labels = np.argmax(predictions, axis=1)
    else: # Binary classification, assuming sigmoid output
        predicted_labels = (predictions > 0.5).astype(int)

    tflite_accuracy = accuracy_score(y_test_actual, predicted_labels)
    print(f"TFLite QAT Model Accuracy on test set: {tflite_accuracy:.4f}")
    if num_output_classes > 1:
        tflite_f1 = f1_score(y_test_actual, predicted_labels, average='weighted')
        tflite_precision = precision_score(y_test_actual, predicted_labels, average='weighted')
        tflite_recall = recall_score(y_test_actual, predicted_labels, average='weighted')
    else: # Binary classification
        tflite_f1 = f1_score(y_test_actual, predicted_labels)
        tflite_precision = precision_score(y_test_actual, predicted_labels)
        tflite_recall = recall_score(y_test_actual, predicted_labels)

    print(f"TFLite QAT Model F1-score: {tflite_f1:.4f}")
    print(f"TFLite QAT Model Precision: {tflite_precision:.4f}")
    print(f"TFLite QAT Model Recall: {tflite_recall:.4f}")

except Exception as e:
    print(f"QAT TFLite conversion or evaluation failed: {e}")
    print("Ensure your model architecture is supported for INT8 quantization and your representative dataset is accurate.")

# You would also perform similar detailed evaluation for your baseline and post-training quantized models
# to enable a comparative analysis.

Loading/Generating actual dataset (replace with your real data)...
Actual training data shape: X=(3000, 10), y=(3000,)
Actual validation data shape: X=(1000, 10), y=(1000,)
Actual test data shape: X=(1000, 10), y=(1000,)

--- Loading Baseline Model from: simple_embedded_model.h5 ---
Error loading baseline model: No file or directory found at simple_embedded_model.h5
Attempting to define a common simple model architecture for 'simple_embedded_model.h5'...


  saving_api.save_model(


Dummy Baseline model created and saved. Size (raw): 16.28 KB

--- Applying Quantization-Aware Training (QAT) ---
Quantization-aware model compiled.
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 quantize_layer (QuantizeLa  (None, 10)                3         
 yer)                                                            
                                                                 
 quant_dense (QuantizeWrapp  (None, 8)                 93        
 erV2)                                                           
                                                                 
 quant_dense_1 (QuantizeWra  (None, 2)                 23        
 pperV2)                                                         
                                                                 
Total params: 119 (476.00 Byte)
Trainable params: 106 (424.00 Byte)
Non-trainable params: 13 (52.00 Byte)


In [2]:
!pip install -U tensorflow-model-optimization

Collecting tensorflow-model-optimization
  Downloading tensorflow_model_optimization-0.8.0-py2.py3-none-any.whl.metadata (904 bytes)
Collecting numpy~=1.23 (from tensorflow-model-optimization)
  Downloading numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.0/61.0 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
Downloading tensorflow_model_optimization-0.8.0-py2.py3-none-any.whl (242 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m242.5/242.5 kB[0m [31m6.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m18.3/18.3 MB[0m [31m80.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: numpy, tensorflow-model-optimization
  Attempting uninstall: numpy
    Found existing installation: numpy 2.0.2
    Uninstalling n