In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt 
import numpy as np 
from sklearn.metrics import confusion_matrix
import torch.nn.functional as F
#import seaborn as sns

In [2]:
transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])

train_dataset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = torchvision.datasets.MNIST(root='./data', train=False, download=True,  transform=transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [3]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)

        self.fc1 = nn.Linear(16*4*4, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.25)

    def forward(self, x):
        # Conv1: (N, 1, 28, 28) -> (N, 6, 24, 24) after 5x5 conv
        x = self.relu(self.conv1(x))
        # Max pooling: (N, 6, 24, 24) -> (N, 6, 12, 12)
        x = F.max_pool2d(x, (2, 2))
        # Conv2: (N, 6, 12, 12) -> (N, 16, 8, 8) after 5x5 conv
        x = self.relu(self.conv2(x))
        # Max pooling: (N, 16, 8, 8) -> (N, 16, 4, 4)
        x = F.max_pool2d(x, 2)
        # Flatten: (N, 16, 4, 4) -> (N, 256)
        x = torch.flatten(x, 1)
        # FC1: (N, 256) -> (N, 120)
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        # FC2: (N, 120) -> (N, 84)
        x = self.relu(self.fc2(x))
        x = self.dropout(x)
        # FC3: (N, 84) -> (N, 10)
        x = self.fc3(x)
        return x

# cnn_model = CNN()
# criterion = nn.CrossEntropyLoss()
# optimizer = torch.optim.Adam(cnn_model.parameters(), lr=0.001)
        

In [4]:
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(28 * 28, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 10)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.25)

    def forward(self, x):
        x = x.view(-1, 28 * 28)
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.relu(self.fc2(x))
        x = self.dropout(x)
        x = self.fc3(x)
        return x

In [5]:
def train_model(model, train_loader, criterion, optimizer, num_epochs=5):
    model.train()
    train_losses = []
    train_accuracies = []

    for epoch in range(num_epochs):
        running_loss = 0.0
        correct = 0
        total = 0

        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            
            # Zero gradients
            optimizer.zero_grad()
            
            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            # Backward pass and optimize
            loss.backward()
            optimizer.step()
            
            # Track loss and accuracy
            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        
        epoch_loss = running_loss / len(train_loader)
        epoch_acc = 100 * correct / total
        train_losses.append(epoch_loss)
        train_accuracies.append(epoch_acc)
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.2f}%')
    
    return train_losses, train_accuracies

In [6]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# CNN model
cnn_model = CNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(cnn_model.parameters(), lr=0.001)

# Simple NN model
simple_nn_model = SimpleNN().to(device)
optimizer_nn = optim.Adam(simple_nn_model.parameters(), lr=0.001)

# Train CNN
print("Training CNN...")
cnn_losses, cnn_accuracies = train_model(cnn_model, train_loader, criterion, optimizer, num_epochs=5)

# Train Simple NN
print("\nTraining Simple NN...")
nn_losses, nn_accuracies = train_model(simple_nn_model, train_loader, criterion, optimizer_nn, num_epochs=5)

Training CNN...
Epoch [1/5], Loss: 0.3082, Accuracy: 90.37%
Epoch [2/5], Loss: 0.0941, Accuracy: 97.27%
Epoch [3/5], Loss: 0.0679, Accuracy: 97.95%
Epoch [4/5], Loss: 0.0536, Accuracy: 98.46%
Epoch [5/5], Loss: 0.0464, Accuracy: 98.64%

Training Simple NN...
Epoch [1/5], Loss: 0.5623, Accuracy: 82.40%
Epoch [2/5], Loss: 0.3058, Accuracy: 90.89%
Epoch [3/5], Loss: 0.2569, Accuracy: 92.27%
Epoch [4/5], Loss: 0.2282, Accuracy: 93.14%
Epoch [5/5], Loss: 0.2159, Accuracy: 93.42%
