## Computer Vision Assignment
### Instructions
In this assignment, you will apply the computer vision concepts covered in the lesson to perform image classification using the Fashion MNIST dataset. The Fashion MNIST dataset consists of 60,000 28x28 grayscale images of 10 fashion categories.

### Task: Build an Image Classifier
1. Use the provided starter code to load and explore the Fashion MNIST dataset
2. Preprocess the images using appropriate techniques (e.g., normalization, data augmentation)
3. Build a CNN model to classify the images into one of the 10 classes
4. Train your model and evaluate its performance
5. Experiment with at least one technique to improve model performance (e.g., batch normalization, different pooling strategies, additional convolutional layers)
6. Visualize and analyze your results

In [2]:
import io
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import matplotlib
matplotlib.use('Agg')  # avoid GUI backend to prevent kernel crash
import matplotlib.pyplot as plt
from torchvision import datasets, transforms
from IPython.display import display, Image


  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# Define transformations (with augmentation: rotation and optional horizontal flip)
transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

In [4]:
# Load the Fashion MNIST dataset
train_dataset = datasets.FashionMNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.FashionMNIST(root='./data', train=False, download=True, transform=transform)

# Create data loaders
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=False)

# Classes in Fashion MNIST
classes = ('T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat',
           'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot')

In [5]:
# Display after load: dataset sizes and sample training images
print(f'Training set size: {len(train_dataset)}')
print(f'Test set size: {len(test_dataset)}')

Training set size: 60000
Test set size: 10000


In [6]:
# CNN model for Fashion MNIST (28x28 grayscale, 10 classes)
# Improvement: Batch normalization after each conv layer (from lesson)
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(num_features=32)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(num_features=64)
        self.fc1 = nn.Linear(in_features=64 * 7 * 7, out_features=512)
        self.fc2 = nn.Linear(in_features=512, out_features=10)

    def forward(self, x):
        x = self.pool(F.relu(self.bn1(self.conv1(x))))
        x = self.pool(F.relu(self.bn2(self.conv2(x))))
        x = x.view(-1, 64 * 7 * 7)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

model = SimpleCNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [7]:
def train_model(model, train_loader, criterion, optimizer, num_epochs=10):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        for i, data in enumerate(train_loader, 0):
            inputs, labels = data
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        avg_loss = running_loss / len(train_loader)
        print(f'Epoch {epoch + 1}, Loss: {avg_loss:.4f}')
    print('Training finished')

def evaluate_model(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data in test_loader:
            images, labels = data
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100.0 * correct / total
    print(f'Accuracy of the network on the test images: {accuracy:.2f}%')
    return accuracy

In [None]:
# Train the model (shows loss each epoch)
train_model(model, train_loader, criterion, optimizer, num_epochs=10)

# Evaluate and show final test accuracy
evaluate_model(model, test_loader)

Epoch 1, Loss: 0.4423
Epoch 2, Loss: 0.3110
Epoch 3, Loss: 0.2747
Epoch 4, Loss: 0.2473
Epoch 5, Loss: 0.2308
Epoch 6, Loss: 0.2136
Epoch 7, Loss: 0.2033
Epoch 8, Loss: 0.1921
Epoch 9, Loss: 0.1812
Epoch 10, Loss: 0.1718
Training finished
Accuracy of the network on the test images: 91.23%


91.23

: 

In [None]:
# Visualization: sample test images with predicted vs actual labels
def imshow(img, ax):
    img = img * 0.5 + 0.5  # unnormalize
    npimg = img.detach().cpu().numpy()
    ax.imshow(np.squeeze(npimg), cmap='gray')

dataiter = iter(test_loader)
images, labels = next(dataiter)
outputs = model(images)
_, predicted = torch.max(outputs, 1)

print('Predicted:', ' '.join(f'{classes[predicted[j]]:12s}' for j in range(4)))

fig = plt.figure(figsize=(8, 8))
for idx in np.arange(9):
    ax = fig.add_subplot(3, 3, idx + 1, xticks=[], yticks=[])
    imshow(images[idx], ax)
    ax.set_title(f'Predicted: {classes[predicted[idx]]}\nActual: {classes[labels[idx]]}', fontsize=8)
plt.tight_layout()
buf = io.BytesIO()
fig.savefig(buf, format='png', bbox_inches='tight')
buf.seek(0)
display(Image(data=buf.getvalue()))
plt.close(fig)

Predicted: Ankle boot   Pullover     Trouser      Trouser     
