
# Task 1.1: Environment Setup and Data Loading


In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.datasets import fashion_mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D
import numpy as np
import os

In [None]:
# Load Fashion-MNIST dataset
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
[1m29515/29515[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
[1m26421880/26421880[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
[1m5148/5148[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz
[1m4422102/4422102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [None]:
# Normalize pixel values to [0, 1]
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0

In [None]:
# Prepare data for MLP (flattened)
x_train_mlp = x_train.reshape(x_train.shape[0], 28, 28)
x_test_mlp = x_test.reshape(x_test.shape[0], 28, 28)

In [None]:
# Prepare data for CNN (add channel dimension)
x_train_cnn = x_train.reshape(x_train.shape[0], 28, 28, 1)
x_test_cnn = x_test.reshape(x_test.shape[0], 28, 28, 1)

In [None]:
print("Dataset shapes:")
print(f"Original train images: {x_train.shape}")
print(f"MLP train images: {x_train_mlp.shape}")
print(f"CNN train images: {x_train_cnn.shape}")
print(f"Train labels: {y_train.shape}")
print(f"Test images: {x_test.shape}")
print(f"Test labels: {y_test.shape}")

Dataset shapes:
Original train images: (60000, 28, 28)
MLP train images: (60000, 28, 28)
CNN train images: (60000, 28, 28, 1)
Train labels: (60000,)
Test images: (10000, 28, 28)
Test labels: (10000,)


# Task 2.1: Implement and Compile the MLP Model

In [None]:
mlp_model = Sequential([
    Flatten(input_shape=(28, 28)),
    Dense(256, activation='relu'),
    Dense(128, activation='relu'),
    Dense(10, activation='softmax')
])

  super().__init__(**kwargs)


In [None]:
mlp_model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

In [None]:
print("\n" + "="*50)
print("MLP Model Summary:")
print("="*50)
mlp_model.summary()


MLP Model Summary:


# Task 2.2: Implement and Compile the CNN Model

In [None]:
cnn_model = Sequential([
    Conv2D(16, (3, 3), activation='relu', input_shape=(28, 28, 1)),
    MaxPooling2D((2, 2)),
    Conv2D(32, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(64, activation='relu'),
    Dense(10, activation='softmax')
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
cnn_model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

In [None]:
print("\n" + "="*50)
print("CNN Model Summary:")
print("="*50)
cnn_model.summary()


CNN Model Summary:


# Task 3.1: Train the MLP

In [None]:
print("\n" + "="*50)
print("Training MLP Model...")
print("="*50)

history_mlp = mlp_model.fit(
    x_train_mlp, y_train,
    batch_size=64,
    epochs=5,
    validation_data=(x_test_mlp, y_test),
    verbose=1
)


Training MLP Model...
Epoch 1/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - accuracy: 0.7781 - loss: 0.6350 - val_accuracy: 0.8448 - val_loss: 0.4270
Epoch 2/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.8645 - loss: 0.3698 - val_accuracy: 0.8631 - val_loss: 0.3815
Epoch 3/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - accuracy: 0.8801 - loss: 0.3272 - val_accuracy: 0.8644 - val_loss: 0.3735
Epoch 4/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.8871 - loss: 0.3014 - val_accuracy: 0.8709 - val_loss: 0.3611
Epoch 5/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - accuracy: 0.8964 - loss: 0.2827 - val_accuracy: 0.8705 - val_loss: 0.3547


# Task 3.2: Train the CNN

In [None]:
print("\n" + "="*50)
print("Training CNN Model...")
print("="*50)

history_cnn = cnn_model.fit(
    x_train_cnn, y_train,
    batch_size=64,
    epochs=5,
    validation_data=(x_test_cnn, y_test),
    verbose=1
)


Training CNN Model...
Epoch 1/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 18ms/step - accuracy: 0.7204 - loss: 0.7823 - val_accuracy: 0.8551 - val_loss: 0.4107
Epoch 2/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 19ms/step - accuracy: 0.8640 - loss: 0.3817 - val_accuracy: 0.8769 - val_loss: 0.3558
Epoch 3/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 18ms/step - accuracy: 0.8835 - loss: 0.3219 - val_accuracy: 0.8807 - val_loss: 0.3304
Epoch 4/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 17ms/step - accuracy: 0.8933 - loss: 0.2948 - val_accuracy: 0.8843 - val_loss: 0.3128
Epoch 5/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 18ms/step - accuracy: 0.9032 - loss: 0.2681 - val_accuracy: 0.8904 - val_loss: 0.3008


# Task 3.3: Evaluate and Report

In [None]:
print("\n" + "="*50)
print("Model Evaluation Results:")
print("="*50)

# Evaluate MLP
test_loss_mlp, test_accuracy_mlp = mlp_model.evaluate(x_test_mlp, y_test, verbose=0)
print(f"MLP Model - Test Loss: {test_loss_mlp:.4f}, Test Accuracy: {test_accuracy_mlp:.4f}")

# Evaluate CNN
test_loss_cnn, test_accuracy_cnn = cnn_model.evaluate(x_test_cnn, y_test, verbose=0)
print(f"CNN Model - Test Loss: {test_loss_cnn:.4f}, Test Accuracy: {test_accuracy_cnn:.4f}")


Model Evaluation Results:
MLP Model - Test Loss: 0.3547, Test Accuracy: 0.8705
CNN Model - Test Loss: 0.3008, Test Accuracy: 0.8904


# Task 4.1: Count Trainable Parameters

In [None]:
def count_parameters(model):
    trainable_params = np.sum([np.prod(v.shape) for v in model.trainable_weights])
    non_trainable_params = np.sum([np.prod(v.shape) for v in model.non_trainable_weights])
    return trainable_params, non_trainable_params

mlp_trainable, mlp_non_trainable = count_parameters(mlp_model)
cnn_trainable, cnn_non_trainable = count_parameters(cnn_model)

print(f"\nMLP Trainable Parameters: {mlp_trainable:,}")
print(f"CNN Trainable Parameters: {cnn_trainable:,}")


MLP Trainable Parameters: 235,146
CNN Trainable Parameters: 56,714


# Task 4.2: Estimate Memory Footprint

In [None]:
# Save models
mlp_model.save('mlp_model.h5')
cnn_model.save('cnn_model.h5')

# Get file sizes
mlp_size = os.path.getsize('mlp_model.h5') / (1024 * 1024)  # Convert to MB
cnn_size = os.path.getsize('cnn_model.h5') / (1024 * 1024)  # Convert to MB

print(f"\nMLP Model Size: {mlp_size:.2f} MB")
print(f"CNN Model Size: {cnn_size:.2f} MB")




MLP Model Size: 2.72 MB
CNN Model Size: 0.69 MB


# Task 4.3: Estimate Computational Resources

In [None]:
def estimate_flops_and_memory(model, input_shape, batch_size=64):
    """
    Estimate FLOPs and memory usage for training and inference
    Note: This is a simplified estimation
    """

    # Count total parameters
    total_params = sum([np.prod(layer.get_weights()[0].shape) if layer.get_weights() else 0
                       for layer in model.layers])


    # This varies by layer type but we use a rough estimate
    inference_flops = total_params * 2
    training_flops = inference_flops * 3

    # Memory estimation (parameters + gradients + optimizer state)
    param_memory = total_params * 4 / (1024 * 1024)
    training_memory = param_memory * 3

    return inference_flops, training_flops, training_memory

# Estimate for MLP
mlp_inf_flops, mlp_train_flops, mlp_train_mem = estimate_flops_and_memory(mlp_model, (28, 28))

# Estimate for CNN
cnn_inf_flops, cnn_train_flops, cnn_train_mem = estimate_flops_and_memory(cnn_model, (28, 28, 1))

print(f"\nComputational Resources Estimation:")
print(f"MLP - Inference FLOPs: {mlp_inf_flops:,.0f}, Training FLOPs: {mlp_train_flops:,.0f}, Training Memory: {mlp_train_mem:.2f} MB")
print(f"CNN - Inference FLOPs: {cnn_inf_flops:,.0f}, Training FLOPs: {cnn_train_flops:,.0f}, Training Memory: {cnn_train_mem:.2f} MB")


Computational Resources Estimation:
MLP - Inference FLOPs: 469,504, Training FLOPs: 1,408,512, Training Memory: 2.69 MB
CNN - Inference FLOPs: 113,184, Training FLOPs: 339,552, Training Memory: 0.65 MB


# Task 5.1: Final Report and Conclusion

In [None]:
print("\n" + "="*80)
print("FINAL COMPARISON REPORT")
print("="*80)

# Create comparison table
comparison_data = [
    ["Model", "Test Accuracy", "Trainable Parameters", "Model Size (MB)", "FLOPs (Training)", "FLOPs (Inference)", "Training Memory (MB)"],
    ["MLP", f"{test_accuracy_mlp:.4f}", f"{mlp_trainable:,}", f"{mlp_size:.2f}", f"{mlp_train_flops:,.0f}", f"{mlp_inf_flops:,.0f}", f"{mlp_train_mem:.2f}"],
    ["CNN", f"{test_accuracy_cnn:.4f}", f"{cnn_trainable:,}", f"{cnn_size:.2f}", f"{cnn_train_flops:,.0f}", f"{cnn_inf_flops:,.0f}", f"{cnn_train_mem:.2f}"]
]

# Print formatted table
for row in comparison_data:
    print(f"{row[0]:<15} {row[1]:<15} {row[2]:<20} {row[3]:<15} {row[4]:<15} {row[5]:<15} {row[6]:<15}")

print("\n" + "="*80)
print("ANALYSIS AND CONCLUSIONS")
print("="*80)

# Answer the questions
print("\n1. Which model achieved a higher accuracy?")
if test_accuracy_cnn > test_accuracy_mlp:
    print(f"   ✓ CNN model achieved higher accuracy ({test_accuracy_cnn:.4f} vs {test_accuracy_mlp:.4f})")
else:
    print(f"   ✓ MLP model achieved higher accuracy ({test_accuracy_mlp:.4f} vs {test_accuracy_cnn:.4f})")

print("\n2. Which model had a smaller number of parameters (lower memory footprint)?")
if cnn_trainable < mlp_trainable:
    print(f"   ✓ CNN model has fewer parameters ({cnn_trainable:,} vs {mlp_trainable:,})")
else:
    print(f"   ✓ MLP model has fewer parameters ({mlp_trainable:,} vs {cnn_trainable:,})")

print("\n3. Explain the trade-off between the two models:")
print("   ✓ CNN Advantages:")
print("     - Better at capturing spatial hierarchies and local patterns")
print("     - Parameter sharing reduces overfitting")
print("     - Translation invariance (recognizes patterns regardless of position)")
print("     - Generally superior for image classification tasks")
print("   ✓ MLP Advantages:")
print("     - Simpler architecture, easier to implement")
print("     - May train faster on some hardware")
print("     - Can work well for simpler image tasks")

print("\n4. Why is CNN generally superior for image tasks?")
print("   ✓ Convolutional layers preserve spatial relationships")
print("   ✓ Hierarchical feature learning (edges → patterns → objects)")
print("   ✓ Parameter efficiency through weight sharing")
print("   ✓ Built-in translation invariance")
print("   ✓ Better generalization with fewer parameters")


FINAL COMPARISON REPORT
Model           Test Accuracy   Trainable Parameters Model Size (MB) FLOPs (Training) FLOPs (Inference) Training Memory (MB)
MLP             0.8705          235,146              2.72            1,408,512       469,504         2.69           
CNN             0.8904          56,714               0.69            339,552         113,184         0.65           

ANALYSIS AND CONCLUSIONS

1. Which model achieved a higher accuracy?
   ✓ CNN model achieved higher accuracy (0.8904 vs 0.8705)

2. Which model had a smaller number of parameters (lower memory footprint)?
   ✓ CNN model has fewer parameters (56,714 vs 235,146)

3. Explain the trade-off between the two models:
   ✓ CNN Advantages:
     - Better at capturing spatial hierarchies and local patterns
     - Parameter sharing reduces overfitting
     - Translation invariance (recognizes patterns regardless of position)
     - Generally superior for image classification tasks
   ✓ MLP Advantages:
     - Simpler arch