Task 1.1: Environment Setup and Data Loading

In [None]:
import tensorflow as tf
from tensorflow import keras
import numpy as np

(x_train, y_train), (x_test, y_test) = keras.datasets.fashion_mnist.load_data()

x_train = x_train.astype("float32") / 255.0
x_test = x_test.astype("float32") / 255.0

x_train_mlp = x_train.copy()
x_test_mlp = x_test.copy()

x_train_cnn = x_train[..., np.newaxis]   
x_test_cnn = x_test[..., np.newaxis]

print("Shapes:")
print("x_train_mlp:", x_train_mlp.shape)
print("x_train_cnn:", x_train_cnn.shape)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
[1m29515/29515[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
[1m26421880/26421880[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
[1m5148/5148[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz
[1m4422102/4422102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Shapes:
x_train_mlp: (60000, 28, 28)
x_train_cnn: (60000, 28, 28, 1)


Task 2.1: Implement and Compile the MLP Model

In [2]:
from tensorflow.keras import layers, models

mlp_model = models.Sequential([
    layers.Flatten(input_shape=(28, 28)),
    layers.Dense(256, activation='relu'),
    layers.Dense(128, activation='relu'),
    layers.Dense(10, activation='softmax')
])

mlp_model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

print("\nMLP Model Summary:")
mlp_model.summary()


MLP Model Summary:


  super().__init__(**kwargs)


Task 2.2: Implement and Compile the CNN Model

In [3]:
cnn_model = models.Sequential([
    layers.Conv2D(filters=16, kernel_size=3, activation='relu', input_shape=(28, 28, 1)),
    layers.MaxPooling2D(pool_size=2),
    layers.Conv2D(filters=32, kernel_size=3, activation='relu'),
    layers.MaxPooling2D(pool_size=2),
    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dense(10, activation='softmax')
])

cnn_model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

print("\nCNN Model Summary:")
cnn_model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)



CNN Model Summary:


Task 3.1: Train the MLP

In [4]:
EPOCHS = 5
BATCH_SIZE = 64

history_mlp = mlp_model.fit(
    x_train_mlp, y_train,
    validation_split=0.1,
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    verbose=2
)

Epoch 1/5
844/844 - 9s - 10ms/step - accuracy: 0.8233 - loss: 0.4971 - val_accuracy: 0.8608 - val_loss: 0.3813
Epoch 2/5
844/844 - 7s - 8ms/step - accuracy: 0.8664 - loss: 0.3651 - val_accuracy: 0.8650 - val_loss: 0.3645
Epoch 3/5
844/844 - 7s - 8ms/step - accuracy: 0.8799 - loss: 0.3266 - val_accuracy: 0.8755 - val_loss: 0.3379
Epoch 4/5
844/844 - 7s - 8ms/step - accuracy: 0.8882 - loss: 0.3033 - val_accuracy: 0.8810 - val_loss: 0.3282
Epoch 5/5
844/844 - 6s - 7ms/step - accuracy: 0.8932 - loss: 0.2868 - val_accuracy: 0.8768 - val_loss: 0.3378


Task 3.2: Train the CNN

In [5]:
EPOCHS = 5
BATCH_SIZE = 64

history_cnn = cnn_model.fit(
    x_train_cnn, y_train,
    validation_split=0.1,
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    verbose=2
)

Epoch 1/5
844/844 - 29s - 34ms/step - accuracy: 0.7884 - loss: 0.5779 - val_accuracy: 0.8463 - val_loss: 0.4269
Epoch 2/5
844/844 - 41s - 49ms/step - accuracy: 0.8621 - loss: 0.3820 - val_accuracy: 0.8538 - val_loss: 0.3826
Epoch 3/5
844/844 - 25s - 29ms/step - accuracy: 0.8766 - loss: 0.3423 - val_accuracy: 0.8637 - val_loss: 0.3723
Epoch 4/5
844/844 - 41s - 48ms/step - accuracy: 0.8869 - loss: 0.3124 - val_accuracy: 0.8805 - val_loss: 0.3255
Epoch 5/5
844/844 - 41s - 48ms/step - accuracy: 0.8950 - loss: 0.2910 - val_accuracy: 0.8928 - val_loss: 0.2943


Task 3.3: Evaluate both models

In [6]:
mlp_eval = mlp_model.evaluate(x_test_mlp, y_test, verbose=2)
cnn_eval = cnn_model.evaluate(x_test_cnn, y_test, verbose=2)

print("\nMLP - Test Loss:", mlp_eval[0], " Test Accuracy:", mlp_eval[1])
print("CNN - Test Loss:", cnn_eval[0], " Test Accuracy:", cnn_eval[1])

313/313 - 1s - 3ms/step - accuracy: 0.8676 - loss: 0.3598
313/313 - 2s - 6ms/step - accuracy: 0.8853 - loss: 0.3120

MLP - Test Loss: 0.35983437299728394  Test Accuracy: 0.8676000237464905
CNN - Test Loss: 0.3120361268520355  Test Accuracy: 0.8852999806404114


Task 4.1 & 4.2: Save models and report file sizes

In [7]:
import os
import sys

os.makedirs('models', exist_ok=True)

mlp_model.save('models/mlp_model.h5')
cnn_model.save('models/cnn_model.h5')

mlp_size = os.path.getsize('models/mlp_model.h5') / (1024**2)
cnn_size = os.path.getsize('models/cnn_model.h5') / (1024**2)

print(f"MLP model size: {mlp_size:.2f} MB")
print(f"CNN model size: {cnn_size:.2f} MB")



MLP model size: 2.72 MB
CNN model size: 0.69 MB


#Task 4.3: Estimate FLOPs and memory (approximate calculations)

In [None]:
mlp_params = mlp_model.count_params()
cnn_params = cnn_model.count_params()

def estimate_flops_dense(in_dim, out_dim):
    return 2 * in_dim * out_dim

mlp_flops_inf = 2 * (784*256 + 256*128 + 128*10)
cnn_flops_inf = 2 * (9*1*26*26*16 + 9*16*11*11*32 + 800*64 + 64*10)

mlp_flops_train = mlp_flops_inf * 3
cnn_flops_train = cnn_flops_inf * 3

mlp_mem_mb = mlp_params * 16 / (1024**2)
cnn_mem_mb = cnn_params * 16 / (1024**2)

print(f"MLP params: {mlp_params:,}  And FLOPs inf: {mlp_flops_inf:,} And Train FLOPs: {mlp_flops_train:,} And Train Mem: {mlp_mem_mb:.2f} MB")
print(f"CNN params: {cnn_params:,}  And FLOPs inf: {cnn_flops_inf:,} And Train FLOPs: {cnn_flops_train:,} And Train Mem: {cnn_mem_mb:.2f} MB")


MLP params: 235,146  And FLOPs inf: 469,504 And Train FLOPs: 1,408,512 And Train Mem: 3.59 MB
CNN params: 56,714  And FLOPs inf: 1,413,504 And Train FLOPs: 4,240,512 And Train Mem: 0.87 MB


# Task 5.1: Summary and Conclusion

In [9]:
import pandas as pd

summary_df = pd.DataFrame({
    "Model": ["MLP", "CNN"],
    "Test Accuracy": [mlp_eval[1], cnn_eval[1]],
    "Params": [mlp_params, cnn_params],
    "Model Size (MB)": [mlp_size, cnn_size],
    "FLOPs (Train)": [mlp_flops_train, cnn_flops_train],
    "Memory (Train MB)": [mlp_mem_mb, cnn_mem_mb]
})

print("\nSummary DataFrame:")
display(summary_df)


Summary DataFrame:


Unnamed: 0,Model,Test Accuracy,Params,Model Size (MB),FLOPs (Train),Memory (Train MB)
0,MLP,0.8676,235146,2.721703,1408512,3.588043
1,CNN,0.8853,56714,0.687439,4240512,0.865387


The CNN model performs better than the MLP because it can capture spatial patterns
in images using convolutional filters. It also uses fewer parameters thanks to
weight sharing, making it more efficient. Although CNNs require more computation
(FLOPs), they achieve higher accuracy in image classification tasks. In contrast,
the MLP is simpler but loses spatial information after flattening the image, so
it needs more parameters and still performs slightly worse.

# **TP6**

In [None]:
import tensorflow as tf
import numpy as np
import os

mlp_model = tf.keras.models.load_model('models/mlp_model.h5')
cnn_model = tf.keras.models.load_model('models/cnn_model.h5')

(x_train, _), _ = tf.keras.datasets.fashion_mnist.load_data()
x_train = x_train.astype("float32") / 255.0

def representative_data_gen():
    for i in range(100):  
        img = x_train[i:i+1]
        img = np.expand_dims(img, -1).astype(np.float32)
        yield [img]



Task 3.1: Convert & Quantize MLP Model

In [None]:
converter_mlp = tf.lite.TFLiteConverter.from_keras_model(mlp_model)
converter_mlp.optimizations = [tf.lite.Optimize.DEFAULT]
converter_mlp.representative_dataset = representative_data_gen
converter_mlp.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter_mlp.inference_input_type = tf.int8
converter_mlp.inference_output_type = tf.int8

tflite_mlp_model = converter_mlp.convert()

with open('models/mlp_model_quantized.tflite', 'wb') as f:
    f.write(tflite_mlp_model)

Saved artifact at '/tmp/tmppdrokaqz'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 28, 28), dtype=tf.float32, name='input_layer')
Output Type:
  TensorSpec(shape=(None, 10), dtype=tf.float32, name=None)
Captures:
  135902667560080: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135902667558544: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135902667557392: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135902667561040: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135902667561232: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135902667560656: TensorSpec(shape=(), dtype=tf.resource, name=None)




Task 3.2: Convert & Quantize CNN Model

In [None]:
converter_cnn = tf.lite.TFLiteConverter.from_keras_model(cnn_model)
converter_cnn.optimizations = [tf.lite.Optimize.DEFAULT]
converter_cnn.representative_dataset = representative_data_gen
converter_cnn.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter_cnn.inference_input_type = tf.int8
converter_cnn.inference_output_type = tf.int8

tflite_cnn_model = converter_cnn.convert()

with open('models/cnn_model_quantized.tflite', 'wb') as f:
    f.write(tflite_cnn_model)

Saved artifact at '/tmp/tmpmzrp680d'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 28, 28, 1), dtype=tf.float32, name='input_layer_1')
Output Type:
  TensorSpec(shape=(None, 10), dtype=tf.float32, name=None)
Captures:
  135902667560848: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135902667568720: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135902614102544: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135902614101584: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135902614101008: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135902614100816: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135902667560464: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135902614099472: TensorSpec(shape=(), dtype=tf.resource, name=None)





Comparing sizes before and after conversion

In [14]:
mlp_h5_size = os.path.getsize('models/mlp_model.h5') / (1024**2)
cnn_h5_size = os.path.getsize('models/cnn_model.h5') / (1024**2)

mlp_tflite_size = os.path.getsize('models/mlp_model_quantized.tflite') / (1024**2)
cnn_tflite_size = os.path.getsize('models/cnn_model_quantized.tflite') / (1024**2)

print(f"MLP Model:  Original = {mlp_h5_size:.2f} MB → Quantized = {mlp_tflite_size:.2f} MB (↓ {mlp_h5_size - mlp_tflite_size:.2f} MB smaller)")
print(f"CNN Model:  Original = {cnn_h5_size:.2f} MB → Quantized = {cnn_tflite_size:.2f} MB (↓ {cnn_h5_size - cnn_tflite_size:.2f} MB smaller)")

MLP Model:  Original = 2.72 MB → Quantized = 0.24 MB (↓ 2.48 MB smaller)
CNN Model:  Original = 0.69 MB → Quantized = 0.06 MB (↓ 0.63 MB smaller)
