In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# Define AlexNet architecture
class AlexNet(nn.Module):
    def __init__(self, num_classes=10):
        super(AlexNet, self).__init__()
        self.conv_layers = nn.Sequential(
            # Conv Layer 1
            nn.Conv2d(1, 64, kernel_size=11, stride=4, padding=2),  # Output: 64 x 8 x 8
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1),       # Output: 64 x 4 x 4
            # Conv Layer 2
            nn.Conv2d(64, 192, kernel_size=5, stride=1, padding=2), # Output: 192 x 4 x 4
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1),       # Output: 192 x 2 x 2
            # Conv Layer 3
            nn.Conv2d(192, 384, kernel_size=3, stride=1, padding=1),# Output: 384 x 2 x 2
            nn.ReLU(inplace=True),
            # Conv Layer 4
            nn.Conv2d(384, 256, kernel_size=3, stride=1, padding=1),# Output: 256 x 2 x 2
            nn.ReLU(inplace=True),
            # Conv Layer 5
            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),# Output: 256 x 2 x 2
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),                  # Output: 256 x 1 x 1
        )
        self.fc_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(256, 4096),                                   # Fully Connected Layer 1
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),                                  # Fully Connected Layer 2
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, num_classes),                           # Output Layer
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = self.fc_layers(x)
        return x

# Hyperparameters
batch_size = 64
learning_rate = 0.001
num_epochs = 5

# Data preprocessing
transform = transforms.Compose([
    transforms.Resize((32, 32)),  # Resize to match AlexNet input size
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,)),  # Normalize grayscale images
])

# Load MNIST dataset
train_dataset = datasets.MNIST(root='./data', train=True, transform=transform, download=True)
test_dataset = datasets.MNIST(root='./data', train=False, transform=transform, download=True)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Initialize the model, loss function, and optimizer
model = AlexNet(num_classes=10)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {running_loss / len(train_loader):.4f}")

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Test Accuracy: {100 * correct / total:.2f}%")


Epoch [1/5], Loss: 0.5078
Epoch [2/5], Loss: 0.1331
Epoch [3/5], Loss: 0.1204
Epoch [4/5], Loss: 0.1044
Epoch [5/5], Loss: 0.0809
Test Accuracy: 98.06%
