In [1]:
!pip install -q tensorflow

In [2]:
import tensorflow as tf
import os

mlp_model = tf.keras.models.load_model('/content/mlp_model.h5')
cnn_model = tf.keras.models.load_model('/content/cnn_model.h5')

def get_file_size_mb(path):
    return os.path.getsize(path) / (1024*1024)

mlp_size = get_file_size_mb('/content/mlp_model.h5')
cnn_size = get_file_size_mb('/content/cnn_model.h5')

print(f"MLP Keras model size: {mlp_size:.2f} MB")
print(f"CNN Keras model size: {cnn_size:.2f} MB")




MLP Keras model size: 2.72 MB
CNN Keras model size: 0.69 MB


In [3]:
import numpy as np
from tensorflow.keras.datasets import fashion_mnist

(x_train, _), _ = fashion_mnist.load_data()
x_train = x_train.astype("float32") / 255.0

def representative_dataset_gen():
    for i in range(100):
        sample = x_train[i:i+1]
        yield [sample.reshape(1,28,28)]


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
[1m29515/29515[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
[1m26421880/26421880[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
[1m5148/5148[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz
[1m4422102/4422102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [4]:

mlp_converter = tf.lite.TFLiteConverter.from_keras_model(mlp_model)

mlp_converter.optimizations = [tf.lite.Optimize.DEFAULT]
mlp_converter.representative_dataset = representative_dataset_gen
mlp_converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
mlp_converter.inference_input_type = tf.uint8
mlp_converter.inference_output_type = tf.uint8

mlp_tflite_model = mlp_converter.convert()

with open('mlp_model_quant.tflite', 'wb') as f:
    f.write(mlp_tflite_model)

mlp_tflite_size = get_file_size_mb('mlp_model_quant.tflite')
print(f"MLP TFLite quantized model size: {mlp_tflite_size:.2f} MB")
print(f"Size reduction: {mlp_size - mlp_tflite_size:.2f} MB")


Saved artifact at '/tmp/tmpth1mexnf'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 784), dtype=tf.float32, name='input_layer_6')
Output Type:
  TensorSpec(shape=(None, 10), dtype=tf.float32, name=None)
Captures:
  132892713790416: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132892713790992: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132892713791760: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132892713789648: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132892713792336: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132892713792528: TensorSpec(shape=(), dtype=tf.resource, name=None)
MLP TFLite quantized model size: 0.24 MB
Size reduction: 2.48 MB




In [5]:
def representative_dataset_gen_cnn():
    for i in range(100):
        sample = x_train[i:i+1].reshape(1,28,28,1)
        yield [sample]

cnn_converter = tf.lite.TFLiteConverter.from_keras_model(cnn_model)
cnn_converter.optimizations = [tf.lite.Optimize.DEFAULT]
cnn_converter.representative_dataset = representative_dataset_gen_cnn
cnn_converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
cnn_converter.inference_input_type = tf.uint8
cnn_converter.inference_output_type = tf.uint8

cnn_tflite_model = cnn_converter.convert()

with open('cnn_model_quant.tflite', 'wb') as f:
    f.write(cnn_tflite_model)

cnn_tflite_size = get_file_size_mb('cnn_model_quant.tflite')
print(f"CNN TFLite quantized model size: {cnn_tflite_size:.2f} MB")
print(f"Size reduction: {cnn_size - cnn_tflite_size:.2f} MB")


Saved artifact at '/tmp/tmpeqpaocdb'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 28, 28, 1), dtype=tf.float32, name='input_layer_5')
Output Type:
  TensorSpec(shape=(None, 10), dtype=tf.float32, name=None)
Captures:
  132892713795024: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132892711011152: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132892711010768: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132892711013072: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132892711014032: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132892711013264: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132892711013456: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132892711014608: TensorSpec(shape=(), dtype=tf.resource, name=None)




CNN TFLite quantized model size: 0.06 MB
Size reduction: 0.63 MB


In [6]:
import pandas as pd

data = {
    "Model": ["MLP", "CNN"],
    "Keras Size (Float32, MB)": [mlp_size, cnn_size],
    "Quantized TFLite Size (int8, MB)": [mlp_tflite_size, cnn_tflite_size],
    "SRAM Constraint (XIAO, MB)": [0.5, 0.5],  # 512 KB = 0.5 MB
    "Can Fit in SRAM?": ["Yes" if mlp_tflite_size < 0.5 else "No",
                         "Yes" if cnn_tflite_size < 0.5 else "No"]
}

df = pd.DataFrame(data)
print(df)


  Model  Keras Size (Float32, MB)  Quantized TFLite Size (int8, MB)  \
0   MLP                  2.721336                          0.237106   
1   CNN                  0.687439                          0.061279   

   SRAM Constraint (XIAO, MB) Can Fit in SRAM?  
0                         0.5              Yes  
1                         0.5              Yes  


In [7]:
print("=== Deployment Feasibility Analysis ===\n")

for model_name, size_mb in zip(["MLP","CNN"], [mlp_tflite_size, cnn_tflite_size]):
    fits = "YES" if size_mb < 0.5 else "NO"
    print(f"{model_name} quantized model size: {size_mb:.3f} MB -> Fits in SRAM? {fits}")

print("\nConclusion:")
print("- Given the dual-core 240 MHz CPU, and the small model sizes:")
print("  * CNN and MLP models could technically run on XIAO ESP32S3.")
print("  * MLP quantized model (~<0.5 MB) easily fits in SRAM for inference.")
print("  * CNN model may be close to SRAM limit depending on batch/activation memory; careful optimization required.")
print("  * Inference should be feasible in tens of milliseconds per image, likely <100ms per sample.")


=== Deployment Feasibility Analysis ===

MLP quantized model size: 0.237 MB -> Fits in SRAM? YES
CNN quantized model size: 0.061 MB -> Fits in SRAM? YES

Conclusion:
- Given the dual-core 240 MHz CPU, and the small model sizes:
  * CNN and MLP models could technically run on XIAO ESP32S3.
  * MLP quantized model (~<0.5 MB) easily fits in SRAM for inference.
  * CNN model may be close to SRAM limit depending on batch/activation memory; careful optimization required.
  * Inference should be feasible in tens of milliseconds per image, likely <100ms per sample.
