# **Image Classification Model on Fashion MNIST using Convolutional Neural Networks**

## Importing Libraries

In [6]:
import torch
import torch.nn as nn # All NN models
import torch.optim as optim # Loads all optimization models
import torchvision.datasets as datasets
from torch.utils.data import DataLoader # Gives easier dataset management
import torchvision.transforms as transforms # Transformations we can perform on our dataset

## Initial Setup

Including check for GPU (Pytorch cuda). Defaults to CPU if not avaiable. (My system doens't have one hence using colab)


In [7]:
# Set device (use GPU if available, otherwise CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Defining the data transformations (pre-processed operations applied to the image before it is fed to the network)
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5,), (0.5,))]
)

# Load the Fashion MNIST dataset- This automatically loads the dataset from ./data directly using
# pytorch's torchvision library

#Training Data
train_dataset = datasets.FashionMNIST(
    root='./data', train=True, transform=transform, download=True
)

#testing data
test_dataset = datasets.FashionMNIST(
    root='./data', train=False, transform=transform, download=True
)

# Create DataLoaders
# this will create batches to feed to the network (since it cannot process  60000 images at once)
train_loader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=64, shuffle=False)

# Defining the class names
class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat',
               'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']

Using device: cuda


100%|██████████| 26.4M/26.4M [00:01<00:00, 14.2MB/s]
100%|██████████| 29.5k/29.5k [00:00<00:00, 209kB/s]
100%|██████████| 4.42M/4.42M [00:01<00:00, 3.91MB/s]
100%|██████████| 5.15k/5.15k [00:00<00:00, 10.7MB/s]


## **Defining the CNN model**

### The working basics

---



1. ```__init__```sets up two convolution and pooling blocks (conv1/pool1 & conv2/pool2)

2. The conv1 layer accepts a single channel image (grey scale). ```in_channels=1```

3. The layer will look for 32 different basic patterns and
produce 32 different feature maps ```out_channels=32```

4. pool1 will be the first pooling layer and will shrink the 32 feature maps from conv1 and shrink them

5. conv2 will be the second convolution layer-
this will find more complex patterns using the simpler images in conv 1 (as explained in my notes)

6. The conv and pool layers are feature extracters (2D)
but the calssification works in 1D, and this flattens the feature map to a single list of numbers (referred to notes)

7. A dropout layer has been added to prevent the model from overfitting.


8. The forward method defines the flow of data through the network layers.



In [8]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        # Convolutional Block 1
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        self.pool1 = nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))

        # Convolutional Block 2
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        self.pool2 = nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))

        # Classifier
        self.fc1 = nn.Linear(64 * 7 * 7, 128)
        self.dropout = nn.Dropout(0.5)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = torch.relu(self.conv1(x))
        x = self.pool1(x)
        x = torch.relu(self.conv2(x))
        x = self.pool2(x)
        x = x.reshape(x.shape[0], -1)
        x = torch.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

## Optimization

CrossEntropyLoss and Adam optimizer are used to train the model.

In [9]:
model = CNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

## Training Loop

One Epoch is one complete pass of the training data through the algorithm

The data is moved to the GPU, and a forward pass is made to get the predictions. The loss is calculated.

loss.backward() calculates the gradients.

optimizer.step() updates the model's weights


After each epoch, the loss from the previous batch is printed.


In [10]:
num_epochs = 15
print("Starting training...")

for epoch in range(num_epochs):
    for batch_idx, (data, targets) in enumerate(train_loader):
        # Move data to device
        data = data.to(device=device)
        targets = targets.to(device=device)

        # Forward pass
        scores = model(data)
        loss = criterion(scores, targets)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")

print("Training finished.")

Starting training...
Epoch [1/15], Loss: 0.5718
Epoch [2/15], Loss: 0.6161
Epoch [3/15], Loss: 0.2944
Epoch [4/15], Loss: 0.0618
Epoch [5/15], Loss: 0.2491
Epoch [6/15], Loss: 0.1921
Epoch [7/15], Loss: 0.1410
Epoch [8/15], Loss: 0.1723
Epoch [9/15], Loss: 0.1864
Epoch [10/15], Loss: 0.1783
Epoch [11/15], Loss: 0.0659
Epoch [12/15], Loss: 0.1451
Epoch [13/15], Loss: 0.1600
Epoch [14/15], Loss: 0.1095
Epoch [15/15], Loss: 0.0683
Training finished.


## Evaluating the model

here the model is tested on unseen data.

In [11]:
def check_accuracy(loader, model):
    num_correct = 0
    num_samples = 0
    model.eval() # Set model to evaluation mode

    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device)
            y = y.to(device=device)

            scores = model(x)
            _, predictions = scores.max(1)
            num_correct += (predictions == y).sum()
            num_samples += predictions.size(0)

    accuracy = float(num_correct) / float(num_samples) * 100
    print(f'Accuracy on the test set: {accuracy:.2f}%')
    model.train() # Set model back to training mode
    return accuracy

check_accuracy(test_loader, model)

Accuracy on the test set: 92.03%


92.03