In [6]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import datetime
import numpy as np
import os
import pandas as pd

In [7]:
# Suppress TensorFlow logging
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

print(f"Using TensorFlow version: {tf.__version__}\n")

Using TensorFlow version: 2.19.0



## 1. Environment Setup and Data Loading

### 1.1. Load the Fashion-MNIST dataset

In [8]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.fashion_mnist.load_data()

### 1.2. Normalize the image data to [0, 1]

In [9]:
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0

### 1.3. Reshape data for the CNN (N, 28, 28, 1)

We add the 'channels' dimension

In [10]:
x_train_cnn = np.expand_dims(x_train, -1)
x_test_cnn = np.expand_dims(x_test, -1)

In [11]:
print(f"Original x_train shape: {x_train.shape}")
print(f"Data shape for MLP (no change needed, Flatten layer handles): {x_train.shape}")
print(f"Data shape for CNN (with channels dim): {x_train_cnn.shape}\n")

Original x_train shape: (60000, 28, 28)
Data shape for MLP (no change needed, Flatten layer handles): (60000, 28, 28)
Data shape for CNN (with channels dim): (60000, 28, 28, 1)



## 2. Model Implementation

### 2.1 MLP model
#### 2.1.1 MLP model implementaion

In [12]:
mlp_model = keras.Sequential([
    layers.Flatten(input_shape=(28, 28)),
    layers.Dense(256, activation='relu'),
    layers.Dense(128, activation='relu'),
    layers.Dense(10, activation='softmax')
], name="MLP_Model")

  super().__init__(**kwargs)


### 2.1.2 Compile the MLP model

In [13]:
mlp_model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

### 2.1.3 MLP model summary

In [14]:
mlp_model.summary()

### 2.2 CNN model
#### 2.2.1 CNN model implementaion

In [15]:
cnn_model = keras.Sequential([
    # Convolutional Block 1
    layers.Conv2D(16, kernel_size=3, activation='relu', input_shape=(28, 28, 1)),
    layers.MaxPooling2D(pool_size=2),

    # Convolutional Block 2
    layers.Conv2D(32, kernel_size=3, activation='relu'),
    layers.MaxPooling2D(pool_size=2),

    # Classifier
    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dense(10, activation='softmax')
], name="CNN_Model")

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


#### 2.2.2 Compile the CNN model

In [16]:
cnn_model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy'],
)

#### 2.2.3 The CNN model summary

In [17]:
cnn_model.summary()

###

## 3. Training and Evaluation

### 3.1 Train MLP model

In [18]:
mlp_history = mlp_model.fit(
    x_train, y_train,
    epochs=5,
    batch_size=64,
    verbose=1
)

Epoch 1/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 7ms/step - accuracy: 0.7848 - loss: 0.6196
Epoch 2/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 8ms/step - accuracy: 0.8674 - loss: 0.3662
Epoch 3/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 7ms/step - accuracy: 0.8763 - loss: 0.3358
Epoch 4/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 8ms/step - accuracy: 0.8877 - loss: 0.3019
Epoch 5/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 7ms/step - accuracy: 0.8966 - loss: 0.2812


### 3.1 Train CNN model

In [19]:
cnn_history = cnn_model.fit(
    x_train_cnn, y_train,
    epochs=5,
    batch_size=64,
    verbose=1
)

Epoch 1/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 30ms/step - accuracy: 0.7195 - loss: 0.7840
Epoch 2/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 30ms/step - accuracy: 0.8691 - loss: 0.3638
Epoch 3/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 30ms/step - accuracy: 0.8849 - loss: 0.3188
Epoch 4/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 30ms/step - accuracy: 0.8944 - loss: 0.2905
Epoch 5/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 31ms/step - accuracy: 0.9053 - loss: 0.2621


### 3.3. Evaluate and Report

#### 3.3.1 MLP model

In [20]:
mlp_loss, mlp_acc = mlp_model.evaluate(x_test, y_test, verbose=0)
print(f"MLP Model - Test Loss:     {mlp_loss:.4f}")
print(f"MLP Model - Test Accuracy: {mlp_acc * 100:.2f}%")

MLP Model - Test Loss:     0.3410
MLP Model - Test Accuracy: 87.61%


#### 3.3.2 CNN model

In [21]:
cnn_loss, cnn_acc = cnn_model.evaluate(x_test_cnn, y_test, verbose=0)
print(f"CNN Model - Test Loss:     {cnn_loss:.4f}")
print(f"CNN Model - Test Accuracy: {cnn_acc * 100:.2f}%")

CNN Model - Test Loss:     0.2908
CNN Model - Test Accuracy: 89.24%


## 4. Resource Usage Comparison

In [22]:
mlp_params = mlp_model.count_params()
cnn_params = cnn_model.count_params()

print(f"MLP Trainable Parameters: {mlp_params}")
print(f"CNN Trainable Parameters: {cnn_params}")

MLP Trainable Parameters: 235146
CNN Trainable Parameters: 56714


### 4.1 Save the models

In [23]:
mlp_model.save('mlp_model.keras')
cnn_model.save('cnn_model.keras')

### 4.2 Get file sizes in MB

In [24]:
mlp_size_mb = os.path.getsize('mlp_model.h5') / (1024 * 1024)
cnn_size_mb = os.path.getsize('cnn_model.h5') / (1024 * 1024)

print(f"MLP Saved Model Size: {mlp_size_mb:.2f} MB")
print(f"CNN Saved Model Size: {cnn_size_mb:.2f} MB")

MLP Saved Model Size: 2.72 MB
CNN Saved Model Size: 0.69 MB


### 4.3. Estimate Computational Resources (FLOPs & Training Memory)

Calculating exact FLOPs and memory is complex.
we gonna use other library to calucalte the FLOPS

#### 4.3.1. FLOPs (Floating-point operations)

In [26]:
def safe_count_flops(model, input_shape):
    """
    Counts FLOPs safely across TensorFlow versions.
    Uses built-in Model.count_flops() if available (TF ≥ 2.16),
    otherwise falls back to a profiler-based method (TF ≤ 2.15).
    """
    # --- Modern API path (TensorFlow 2.16+ / Keras 3.x) ---
    if hasattr(model, "count_flops"):
        flops = model.count_flops(
            input_signature=(tf.TensorSpec(shape=(1, *input_shape), dtype=tf.float32),)
        )
        return flops

    # --- Backward-compatible path ---
    from tensorflow.python.profiler import model_analyzer, option_builder
    from tensorflow.python.framework import convert_to_constants

    concrete_func = tf.function(model).get_concrete_function(
        tf.TensorSpec([1, *input_shape], tf.float32)
    )
    frozen_func = convert_to_constants.convert_variables_to_constants_v2(concrete_func)

    run_meta = tf.compat.v1.RunMetadata()
    opts = option_builder.ProfileOptionBuilder.float_operation()

    flops = tf.compat.v1.profiler.profile(
        graph=frozen_func.graph,
        run_meta=run_meta,
        cmd='op',
        options=opts
    )
    return flops.total_float_ops


# --- Use it on your models ---
mlp_flops = safe_count_flops(mlp_model, input_shape=(28, 28))
cnn_flops = safe_count_flops(cnn_model, input_shape=(28, 28, 1))

print(f"MLP Model FLOPs: {mlp_flops / 1e6:.2f} M-FLOPs")
print(f"CNN Model FLOPs: {cnn_flops / 1e6:.2f} M-FLOPs")

Instructions for updating:
This API was designed for TensorFlow v1. See https://www.tensorflow.org/guide/migrate for instructions on how to migrate your code to TensorFlow v2.


MLP Model FLOPs: 0.47 M-FLOPs
CNN Model FLOPs: 1.44 M-FLOPs


#### 4.3.2. Training Memory

Training memory ≈ (Model Parameters + Gradients + Optimizer State) + Activations
- Model Parameters: Size of weights (e.g., mlp_params \* 4 bytes for float32)
- Gradients: Same size as parameters.
- Optimizer State: For Adam, stores 2 states (m, v) per param. (2 \* param_size)
- Activations: Depends on batch_size. (batch_size \* activation_size_per_layer)

Rough Estimate (Model-related memory, excluding activations):
Total = Params + Gradients + Optimizer(m) + Optimizer(v) = 4 \* Param_size

In [27]:
mlp_train_mem_mb = (mlp_params * 4 * 4) / (1024 * 1024) # 4x params, 4 bytes/param
cnn_train_mem_mb = (cnn_params * 4 * 4) / (1024 * 1024) # 4x params, 4 bytes/param
print(f"Estimated MLP Training Memory (Params + Grads + Adam): {mlp_train_mem_mb:.2f} MB")
print(f"Estimated CNN Training Memory (Params + Grads + Adam): {cnn_train_mem_mb:.2f} MB")
print("(This excludes memory for batch activations, which can be significant)")

Estimated MLP Training Memory (Params + Grads + Adam): 3.59 MB
Estimated CNN Training Memory (Params + Grads + Adam): 0.87 MB
(This excludes memory for batch activations, which can be significant)


## 5. Final Report and Conclusion

### 5.1. Write the Conclusion

create a model summary

In [30]:
summary_data = {
    "Model": ["MLP", "CNN"],
    "Test Accuracy": [f"{mlp_acc*100:.2f}%", f"{cnn_acc*100:.2f}%"],
    "Trainable Parameters": [mlp_params, cnn_params],
    "Saved Model Size (MB)": [f"{mlp_size_mb:.2f}", f"{cnn_size_mb:.2f}"],
    "FLOPs (Inference)": ["0.47 M-FLOPs", "1.44 M-FLOPs"],
    "FLOPs (Training)": ["~1.4 MFLOPs (Est.)", "~4.24 MFLOPs (Est.)"],
    "Training Memory (MB)": [f"~{mlp_train_mem_mb:.2f} + Activations", f"~{cnn_train_mem_mb:.2f} + Activations"]
}

summary_table = pd.DataFrame(summary_data)
print(summary_table.to_markdown(index=False, numalign="center", stralign="center"))

|  Model  |  Test Accuracy  |  Trainable Parameters  |  Saved Model Size (MB)  |  FLOPs (Inference)  |  FLOPs (Training)   |  Training Memory (MB)  |
|:-------:|:---------------:|:----------------------:|:-----------------------:|:-------------------:|:-------------------:|:----------------------:|
|   MLP   |     87.61%      |         235146         |          2.72           |    0.47 M-FLOPs     | ~1.4 MFLOPs (Est.)  |  ~3.59 + Activations   |
|   CNN   |     89.24%      |         56714          |          0.69           |    1.44 M-FLOPs     | ~4.24 MFLOPs (Est.) |  ~0.87 + Activations   |


|  Model  |  Test Accuracy  |  Trainable Parameters  |  Saved Model Size (MB)  |  FLOPs (Inference)  |  FLOPs (Training)  |  Training Memory (MB)  |
|:-------:|:---------------:|:----------------------:|:-----------------------:|:-------------------:|:------------------:|:----------------------:|
|   MLP   |     88.17%      |         235146         |          2.72           | ~470 kFLOPs (Est.)  | ~1.4 MFLOPs (Est.) |  ~3.59 + Activations   |
|   CNN   |     88.98%      |         56714          |          0.69           | ~1.2 MFLOPs (Est.)  | ~3.6 MFLOPs (Est.) |  ~0.87 + Activations   |

## Conclusion Questions

### 1. Which model achieved a higher accuracy?

Answer: The **CNN model** achieved higher accuracy (88.98%) compared to the MLP model (88.17%).

### 2. Which model had a smaller number of parameters (lower memory footprint)?
Answer: The **CNN model** had significantly fewer parameters (56714) than the MLP model (235146). This also resulted in a smaller saved file size.


### 3. Explain the trade-off between the two models...

The **CNN model** (the "winner" in accuracy) is superior for image tasks because it uses **convolutional layers**. These layers are specifically designed to find spatial patterns (like edges, textures, and shapes) in the image. They also use **parameter sharing**, where the same filter (kernel) is slid across the entire image. This makes the model:
   1.  **More effective:** It learns features that are *translationally invariant* (it can find a "shoe" anywhere in the image).
   2.  **More efficient:** It needs far fewer parameters than an MLP, which connects *every* input pixel to *every* neuron in the first hidden layer.


The **MLP model**'s main disadvantage is that it's "fully connected." It treats the $28 \times 28$ image as a flat $784$-element vector and loses all spatial information. It doesn't know which pixels are next to each other. Its only "advantage" in this context is its conceptual simplicity, but it is a poor choice for image data, as shown by its lower accuracy and massive parameter count.


**In summary:** The CNN achieves higher accuracy with *fewer* parameters because it is an architecture fundamentally suited for spatial data like images.