In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader


In [2]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])


In [3]:
train_dataset = datasets.FashionMNIST(
    root="./data",
    train=True,
    download=True,
    transform=transform
)

test_dataset = datasets.FashionMNIST(
    root="./data",
    train=False,
    download=True,
    transform=transform
)


100%|██████████| 26.4M/26.4M [00:00<00:00, 116MB/s]
100%|██████████| 29.5k/29.5k [00:00<00:00, 4.08MB/s]
100%|██████████| 4.42M/4.42M [00:00<00:00, 60.7MB/s]
100%|██████████| 5.15k/5.15k [00:00<00:00, 10.7MB/s]


In [4]:
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)


In [5]:
class FashionNet(nn.Module):
    def __init__(self):
        super(FashionNet, self).__init__()

        self.fc1 = nn.Linear(784, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 64)
        self.fc4 = nn.Linear(64, 10)

        self.relu = nn.ReLU()

    def forward(self, x):
        x = x.view(-1, 784)

        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.relu(self.fc3(x))
        x = self.fc4(x)

        return x


In [6]:
model = FashionNet()

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [8]:
epochs = 10

for epoch in range(epochs):
    model.train()
    running_loss = 0
    correct = 0
    total = 0

    for images, labels in train_loader:
        optimizer.zero_grad()

        outputs = model(images)
        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print(f"Epoch [{epoch+1}/{epochs}] Loss: {running_loss:.4f}, Accuracy: {accuracy:.2f}%")


Epoch [1/10] Loss: 206.4493, Accuracy: 91.74%
Epoch [2/10] Loss: 196.2805, Accuracy: 91.99%
Epoch [3/10] Loss: 189.1189, Accuracy: 92.33%
Epoch [4/10] Loss: 178.6643, Accuracy: 92.77%
Epoch [5/10] Loss: 174.5890, Accuracy: 93.01%
Epoch [6/10] Loss: 166.8051, Accuracy: 93.31%
Epoch [7/10] Loss: 160.6346, Accuracy: 93.42%
Epoch [8/10] Loss: 154.4364, Accuracy: 93.70%
Epoch [9/10] Loss: 150.0976, Accuracy: 93.88%
Epoch [10/10] Loss: 142.5705, Accuracy: 94.22%


In [9]:
model.eval()
correct = 0
total = 0

with torch.no_grad():
    for images, labels in test_loader:
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)

        total += labels.size(0)
        correct += (predicted == labels).sum().item()

test_accuracy = 100 * correct / total
print(f"Test Accuracy: {test_accuracy:.2f}%")


Test Accuracy: 88.86%


Optimised Version:


In [10]:
class OptimizedFashionNet(nn.Module):
    def __init__(self):
        super().__init__()

        self.model = nn.Sequential(
            nn.Linear(784, 256),
            nn.ReLU(),
            nn.Dropout(0.3),

            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Dropout(0.3),

            nn.Linear(128, 10)
        )

    def forward(self, x):
        x = x.view(-1, 784)
        return self.model(x)


(Change	-             Effect)
(Fewer layers	 -     Less overfitting)
(Dropout	        -     Reduces variance)
(Smaller network	   -   Better bias-variance balance)

In [11]:
model = OptimizedFashionNet()

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [12]:
epochs = 10

for epoch in range(epochs):
    model.train()  # training mode (important for Dropout)

    running_loss = 0.0
    correct = 0
    total = 0

    for images, labels in train_loader:

        # Reset gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(images)

        # Compute loss
        loss = criterion(outputs, labels)

        # Backward pass
        loss.backward()

        # Update weights
        optimizer.step()

        # Track loss
        running_loss += loss.item()

        # Track accuracy
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    train_accuracy = 100 * correct / total
    avg_loss = running_loss / len(train_loader)

    print(f"Epoch [{epoch+1}/{epochs}] "
          f"Loss: {avg_loss:.4f} "
          f"Train Accuracy: {train_accuracy:.2f}%")


Epoch [1/10] Loss: 0.5692 Train Accuracy: 79.28%
Epoch [2/10] Loss: 0.4325 Train Accuracy: 84.35%
Epoch [3/10] Loss: 0.3999 Train Accuracy: 85.55%
Epoch [4/10] Loss: 0.3766 Train Accuracy: 86.33%
Epoch [5/10] Loss: 0.3628 Train Accuracy: 86.81%
Epoch [6/10] Loss: 0.3494 Train Accuracy: 87.15%
Epoch [7/10] Loss: 0.3379 Train Accuracy: 87.60%
Epoch [8/10] Loss: 0.3313 Train Accuracy: 87.72%
Epoch [9/10] Loss: 0.3251 Train Accuracy: 87.97%
Epoch [10/10] Loss: 0.3159 Train Accuracy: 88.51%


In [13]:
model.eval()  # evaluation mode (Dropout OFF)

correct = 0
total = 0

with torch.no_grad():  # no gradient computation
    for images, labels in test_loader:

        outputs = model(images)
        _, predicted = torch.max(outputs, 1)

        total += labels.size(0)
        correct += (predicted == labels).sum().item()

test_accuracy = 100 * correct / total
print(f"Test Accuracy: {test_accuracy:.2f}%")


Test Accuracy: 88.02%


In this experiment, I built and trained a deep neural network to classify Fashion MNIST images using PyTorch. First, a larger model was trained to learn image features, and then the model was optimized by reducing layers and adding regularization to improve generalization. This showed that optimization helps make the model more stable and perfo
rm better on new, unseen data.