<a href="https://colab.research.google.com/github/footing11/custom-MLP-for-fashion-mnist-dataset/blob/main/Untitled0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
import time

In [2]:
# Check if a GPU is available; otherwise, use the CPU.
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
# We need to transform the images into tensors and normalize them.
transform = transforms.Compose([
    transforms.ToTensor(),  # Convert images to PyTorch tensors
    transforms.Normalize((0.5,), (0.5,))  # Normalize pixel values to range [-1, 1]
])

In [4]:
# Load the training and test datasets
train_dataset = datasets.FashionMNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.FashionMNIST(root='./data', train=False, download=True, transform=transform)


Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to ./data/FashionMNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 26.4M/26.4M [00:03<00:00, 6.95MB/s]


Extracting ./data/FashionMNIST/raw/train-images-idx3-ubyte.gz to ./data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 29.5k/29.5k [00:00<00:00, 133kB/s]


Extracting ./data/FashionMNIST/raw/train-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to ./data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 4.42M/4.42M [00:08<00:00, 518kB/s] 


Extracting ./data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to ./data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 5.15k/5.15k [00:00<00:00, 4.80MB/s]

Extracting ./data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw






In [5]:
# Create data loaders for training and testing
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=64, shuffle=False)

In [7]:
# model define
# This is a simple MLP with one hidden layer.
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(28*28, 256)  # First layer (input to hidden)
        self.fc2 = nn.Linear(256, 128)    # Second hidden layer
        self.fc3 = nn.Linear(128, 10)     # Output layer

    def forward(self, x):
        x = x.view(-1, 28*28)  # Flatten the input image
        x = torch.relu(self.fc1(x))  # Apply ReLU activation
        x = torch.relu(self.fc2(x))  # Apply ReLU activation
        x = self.fc3(x)              # Output without activation (raw scores)
        return x

In [8]:
# Define the Loss Function and Optimizer
model = MLP().to(device)
criterion = nn.CrossEntropyLoss()  # Cross-entropy loss for multi-class classification
optimizer = optim.Adam(model.parameters(), lr=0.001)  # Adam optimizer


In [9]:
# training
epochs = 10  # Number of epochs for training
for epoch in range(epochs):
    model.train()  # Set the model to training mode
    running_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()  # Clear gradients for this step
        outputs = model(images)  # Forward pass
        loss = criterion(outputs, labels)  # Calculate loss
        loss.backward()  # Backpropagation
        optimizer.step()  # Update weights
        running_loss += loss.item()

    print(f"Epoch [{epoch+1}/{epochs}], Loss: {running_loss/len(train_loader):.4f}")

Epoch [1/10], Loss: 0.4963
Epoch [2/10], Loss: 0.3692
Epoch [3/10], Loss: 0.3273
Epoch [4/10], Loss: 0.3060
Epoch [5/10], Loss: 0.2842
Epoch [6/10], Loss: 0.2689
Epoch [7/10], Loss: 0.2544
Epoch [8/10], Loss: 0.2430
Epoch [9/10], Loss: 0.2323
Epoch [10/10], Loss: 0.2204


In [10]:
# model evaluation on test data
model.eval()
correct = 0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum().item()

total = len(test_dataset)
accuracy = (correct / total) * 100
print(f"Test Accuracy: {accuracy:.2f}%")


Test Accuracy: 87.63%


In [11]:
# Save the model weights using PyTorch's `torch.save` function.
torch.save(model.state_dict(), 'mlp_fashion_mnist_weights.pth')
print("Model weights saved.")

Model weights saved.


In [12]:
# Define basic ReLU and softmax functions manually.
def relu(x):
    return torch.maximum(torch.zeros_like(x), x)

def softmax(x):
    exp_x = torch.exp(x - torch.max(x))  # Subtract max for numerical stability
    return exp_x / exp_x.sum(dim=1, keepdim=True)

In [13]:
# Load weights into a custom MLP structure without high-level functions
custom_weights = torch.load('mlp_fashion_mnist_weights.pth', map_location='cpu')

  custom_weights = torch.load('mlp_fashion_mnist_weights.pth', map_location='cpu')


In [14]:
# Custom inference function using basic operations
def custom_inference(x):
    x = x.view(-1, 28*28)
    x = torch.matmul(x, custom_weights['fc1.weight'].T) + custom_weights['fc1.bias']
    x = relu(x)
    x = torch.matmul(x, custom_weights['fc2.weight'].T) + custom_weights['fc2.bias']
    x = relu(x)
    x = torch.matmul(x, custom_weights['fc3.weight'].T) + custom_weights['fc3.bias']
    x = softmax(x)
    return x

In [15]:
# Comparing Outputs from Both Models
# Use two random samples from the test set
import random
model.eval()
for i in range(2):
    index = random.randint(0, len(test_dataset) - 1)
    image, label = test_dataset[index]
    image = image.unsqueeze(0)  # Add batch dimension

    # Get outputs from the PyTorch model
    image = image.to('cpu')
    output_pytorch = model(image).detach()
    output_custom = custom_inference(image)

    print(f"Sample {i+1}:")
    print(f"True Label: {label}")
    print(f"PyTorch Model Output: {output_pytorch}")
    print(f"Custom Model Output: {output_custom}")

Sample 1:
True Label: 2
PyTorch Model Output: tensor([[ 0.9657, -7.1727,  4.4428, -3.4468, -0.1081, -6.8258, -0.3674, -9.2312,
         -7.3935, -4.9447]])
Custom Model Output: tensor([[2.9423e-02, 8.5946e-06, 9.5230e-01, 3.5675e-04, 1.0054e-02, 1.2159e-05,
         7.7575e-03, 1.0970e-06, 6.8916e-06, 7.9767e-05]])
Sample 2:
True Label: 6
PyTorch Model Output: tensor([[ -1.5064, -13.0206,   4.5136,  -3.9776,  -0.3133,  -9.5238,   5.2332,
          -9.3489,  -5.2601,  -8.6978]])
Custom Model Output: tensor([[7.9296e-04, 7.9189e-09, 3.2633e-01, 6.6989e-05, 2.6144e-03, 2.6139e-07,
         6.7017e-01, 3.1136e-07, 1.8579e-05, 5.9707e-07]])


In [16]:
# Calculate the Number of Parameters
# Cstom function to count the parameters of the model
def count_parameters(model):
    total_params = 0
    layer_params = {}
    for name, param in model.named_parameters():
        if param.requires_grad:
            param_count = param.numel()
            total_params += param_count
            layer_params[name] = param_count
    return total_params, layer_params

In [17]:
# Print the number of parameters
total_params, layer_params = count_parameters(model)
print(f"Total Parameters: {total_params}")
print(f"Parameters by Layer: {layer_params}")

Total Parameters: 235146
Parameters by Layer: {'fc1.weight': 200704, 'fc1.bias': 256, 'fc2.weight': 32768, 'fc2.bias': 128, 'fc3.weight': 1280, 'fc3.bias': 10}


In [18]:
# run inference on CPU and GPU and Compare Execution Times
image, _ = test_dataset[random.randint(0, len(test_dataset) - 1)]
image = image.unsqueeze(0).to(device)

In [19]:
start_time = time.time()
output_cpu = model(image.to('cpu')).detach()
cpu_time = time.time() - start_time

In [20]:
start_time = time.time()
output_gpu = model(image.to('cuda' if torch.cuda.is_available() else 'cpu')).detach()
gpu_time = time.time() - start_time

In [21]:
print(f"CPU Inference Time: {cpu_time:.6f} seconds")
print(f"GPU Inference Time: {gpu_time:.6f} seconds")

CPU Inference Time: 0.002114 seconds
GPU Inference Time: 0.002129 seconds
