In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split

In [12]:
# --- Hyperparameters (things you can easily change!) ---
num_epochs = 10
learning_rate = 0.01
weight_decay = 0.001
batch_size = 64
validation_split = 0.2  # Percentage of the training data to use for validation
random_seed = 42      # For making sure our splits are the same each time

In [3]:
# Set the random seed for reproducibility
torch.manual_seed(random_seed)

<torch._C.Generator at 0x1db3c4dee30>

In [4]:
# --- Preparing the Dataset and DataLoaders ---

# Define the transformations to apply to the images
# Here, we convert the images to PyTorch tensors and normalize the pixel values
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

# Download the MNIST dataset
train_dataset = datasets.MNIST('./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST('./data', train=False, download=True, transform=transform)

# Split the training dataset into training and validation sets
train_size = int((1 - validation_split) * len(train_dataset))
val_size = len(train_dataset) - train_size
train_dataset, val_dataset = random_split(train_dataset, [train_size, val_size])

# Create DataLoaders. These help us load the data in batches during training.
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 404: Not Found

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./data\MNIST\raw\train-images-idx3-ubyte.gz


100.0%


Extracting ./data\MNIST\raw\train-images-idx3-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 404: Not Found

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./data\MNIST\raw\train-labels-idx1-ubyte.gz


100.0%


Extracting ./data\MNIST\raw\train-labels-idx1-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 404: Not Found

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./data\MNIST\raw\t10k-images-idx3-ubyte.gz


100.0%


Extracting ./data\MNIST\raw\t10k-images-idx3-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 404: Not Found

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./data\MNIST\raw\t10k-labels-idx1-ubyte.gz


100.0%

Extracting ./data\MNIST\raw\t10k-labels-idx1-ubyte.gz to ./data\MNIST\raw






In [5]:
# --- Implementing the Simple Classifier ---

class SimpleClassifier(nn.Module):
    def __init__(self, input_size, output_size):
        super(SimpleClassifier, self).__init__()
        # This is a single linear layer (like a simple connection of all inputs to all outputs)
        self.linear = nn.Linear(input_size, output_size)

    def forward(self, x):
        # The input images are 28x28 pixels, so we need to flatten them into a single vector of 784 elements
        x = x.view(-1, 28 * 28)
        # Pass the flattened vector through the linear layer
        x = self.linear(x)
        return x

In [6]:
# Initialize the model
input_size = 28 * 28  # 784 input features (28x28 pixels)
output_size = 10     # 10 output classes (digits 0-9)
model = SimpleClassifier(input_size, output_size)

In [7]:
# --- Training Function ---

def train(model, train_loader, optimizer, epoch):
    model.train()  # Set the model to training mode
    total_loss = 0
    correct = 0
    total = 0
    for batch_idx, (data, target) in enumerate(train_loader):
        # Zero the gradients from the previous batch
        optimizer.zero_grad()
        # Forward pass: compute the output predictions
        output = model(data)
        # Calculate the loss (how wrong the predictions are compared to the true labels)
        loss = nn.CrossEntropyLoss()(output, target)
        # Backward pass: compute the gradients of the loss with respect to the model's parameters
        loss.backward()
        # Update the model's parameters based on the gradients
        optimizer.step()

        total_loss += loss.item()
        _, predicted = torch.max(output.data, 1)
        total += target.size(0)
        correct += (predicted == target).sum().item()

        if (batch_idx + 1) % 100 == 0:
            print(f'Train Epoch: {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)} '
                  f'({100. * batch_idx / len(train_loader):.0f}%)]\tLoss: {loss.item():.6f}')

    avg_loss = total_loss / len(train_loader)
    accuracy = 100. * correct / total
    print(f'Train Epoch: {epoch} Average Loss: {avg_loss:.4f}, Accuracy: {accuracy:.2f}%')
    return avg_loss, accuracy

In [8]:
# --- Evaluating Function ---

def evaluate(model, data_loader):
    model.eval()   # Set the model to evaluation mode (no gradient calculation)
    total_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():  # Disable gradient calculations during evaluation
        for data, target in data_loader:
            output = model(data)
            loss = nn.CrossEntropyLoss()(output, target)
            total_loss += loss.item()
            _, predicted = torch.max(output.data, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()

    avg_loss = total_loss / len(data_loader)
    accuracy = 100. * correct / total
    return avg_loss, accuracy

In [9]:
# --- Testing Function ---

def test(model, test_loader):
    test_loss, test_accuracy = evaluate(model, test_loader)
    print(f'\nTest set: Average loss: {test_loss:.4f}, Accuracy: {test_accuracy:.2f}%')

In [10]:
# --- Initialize the Optimizer ---
optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

# --- Training and Evaluation Loop ---
print("Starting Training...")
for epoch in range(1, num_epochs + 1):
    train_loss, train_accuracy = train(model, train_loader, optimizer, epoch)
    val_loss, val_accuracy = evaluate(model, val_loader)
    print(f'Validation Epoch: {epoch} Average Loss: {val_loss:.4f}, Accuracy: {val_accuracy:.2f}%')

Starting Training...
Train Epoch: 1 Average Loss: 0.5105, Accuracy: 87.24%
Validation Epoch: 1 Average Loss: 0.5379, Accuracy: 87.69%
Train Epoch: 2 Average Loss: 0.5061, Accuracy: 88.51%
Validation Epoch: 2 Average Loss: 0.5505, Accuracy: 88.90%
Train Epoch: 3 Average Loss: 0.4971, Accuracy: 88.70%
Validation Epoch: 3 Average Loss: 0.5177, Accuracy: 88.83%
Train Epoch: 4 Average Loss: 0.4908, Accuracy: 88.93%
Validation Epoch: 4 Average Loss: 0.4988, Accuracy: 89.12%
Train Epoch: 5 Average Loss: 0.5053, Accuracy: 88.58%
Validation Epoch: 5 Average Loss: 0.5637, Accuracy: 88.28%
Train Epoch: 6 Average Loss: 0.5299, Accuracy: 88.55%
Validation Epoch: 6 Average Loss: 0.5402, Accuracy: 88.92%
Train Epoch: 7 Average Loss: 0.5009, Accuracy: 88.88%
Validation Epoch: 7 Average Loss: 0.5408, Accuracy: 88.54%
Train Epoch: 8 Average Loss: 0.5318, Accuracy: 88.53%
Validation Epoch: 8 Average Loss: 0.5352, Accuracy: 88.24%
Train Epoch: 9 Average Loss: 0.5019, Accuracy: 88.80%
Validation Epoch: 9 A

In [13]:
# --- Testing the Model ---
print("\nStarting Testing...")
test(model, test_loader)


Starting Testing...

Test set: Average loss: 0.5573, Accuracy: 88.37%
