In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.datasets as datasets
import torchvision.transforms as transforms

# Define hyperparameters
num_epochs = 5
batch_size = 64
learning_rate = 0.001

# Define the CNN architecture
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=16, kernel_size=3, stride=1)
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=1)
        self.relu2 = nn.ReLU()
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(in_features=32*5*5, out_features=10)  # Update input size here

    def forward(self, x):
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.pool1(x)
        x = self.conv2(x)
        x = self.relu2(x)
        x = self.pool2(x)
        x = x.view(x.size(0), -1)  # Use dynamic view for flexible batch sizes
        x = self.fc1(x)
        return x

# Load the MNIST dataset
train_dataset = datasets.MNIST(root='./data', train=True, transform=transforms.ToTensor(), download=True)
test_dataset = datasets.MNIST(root='./data', train=False, transform=transforms.ToTensor(), download=True)

# Create the dataloaders
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

# Instantiate the CNN model and define the loss function and optimizer
model = CNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Train the model
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (i+1) % 100 == 0:
            print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, i+1, len(train_loader), loss.item()))

# Test the model
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print('Test Accuracy of the model on the 10000 test images: {} %'.format(100 * correct / total))

Epoch [1/5], Step [100/938], Loss: 0.4462
Epoch [1/5], Step [200/938], Loss: 0.1914
Epoch [1/5], Step [300/938], Loss: 0.2631
Epoch [1/5], Step [400/938], Loss: 0.0913
Epoch [1/5], Step [500/938], Loss: 0.0767
Epoch [1/5], Step [600/938], Loss: 0.0673
Epoch [1/5], Step [700/938], Loss: 0.1048
Epoch [1/5], Step [800/938], Loss: 0.0466
Epoch [1/5], Step [900/938], Loss: 0.1056
Epoch [2/5], Step [100/938], Loss: 0.1968
Epoch [2/5], Step [200/938], Loss: 0.0324
Epoch [2/5], Step [300/938], Loss: 0.0884
Epoch [2/5], Step [400/938], Loss: 0.0551
Epoch [2/5], Step [500/938], Loss: 0.0338
Epoch [2/5], Step [600/938], Loss: 0.0134
Epoch [2/5], Step [700/938], Loss: 0.0850
Epoch [2/5], Step [800/938], Loss: 0.0265
Epoch [2/5], Step [900/938], Loss: 0.1332
Epoch [3/5], Step [100/938], Loss: 0.0508
Epoch [3/5], Step [200/938], Loss: 0.0174
Epoch [3/5], Step [300/938], Loss: 0.0080
Epoch [3/5], Step [400/938], Loss: 0.0406
Epoch [3/5], Step [500/938], Loss: 0.0224
Epoch [3/5], Step [600/938], Loss: