In [1]:
# Imports
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.datasets as datasets  # Has standard datasets we can import in a nice way
import torchvision.transforms as transforms  # Transformations we can perform on our dataset
from torch.utils.data import (
    DataLoader,
) 
from tqdm import tqdm

In [2]:

# Simple CNN
class CNN(nn.Module):
    def __init__(self, in_channels=1, num_classes=10):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(
            in_channels=in_channels, out_channels=8, kernel_size=3, stride=1, padding=1
        )
        self.pool = nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))
        self.conv2 = nn.Conv2d(
            in_channels=8, out_channels=16, kernel_size=3, stride=1, padding=1
        )
        self.fc1 = nn.Linear(16 * 7 * 7, num_classes)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool(x)
        x = F.relu(self.conv2(x))
        x = self.pool(x)
        x = x.reshape(x.shape[0], -1)
        x = self.fc1(x)
        return x


In [3]:

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

in_channels = 1
num_classes = 10
num_epochs = 5

train_dataset = datasets.MNIST(
    root="dataset/", train=True, transform=transforms.ToTensor(), download=True
)

batch_sizes = [16, 64, 128]
learning_rates = [0.1, 0.01, 0.001]


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to dataset/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting dataset/MNIST/raw/train-images-idx3-ubyte.gz to dataset/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to dataset/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting dataset/MNIST/raw/train-labels-idx1-ubyte.gz to dataset/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to dataset/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting dataset/MNIST/raw/t10k-images-idx3-ubyte.gz to dataset/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to dataset/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting dataset/MNIST/raw/t10k-labels-idx1-ubyte.gz to dataset/MNIST/raw



In [5]:

for batch_size in batch_sizes:
    for learning_rate in learning_rates:
        step = 0
        # Initialize network
        model = CNN(in_channels=in_channels, num_classes=num_classes)
        model.to(device)
        model.train()
        criterion = nn.CrossEntropyLoss()
        train_loader = DataLoader(
            dataset=train_dataset, batch_size=batch_size, shuffle=True
        )
        optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=0.0)

        for epoch in range(num_epochs):
            losses = []
            accuracies = []
            loop = tqdm(enumerate(train_loader), total=len(train_loader))
            for batch_idx, (data, targets) in loop:
                data = data.to(device=device)
                targets = targets.to(device=device)

                scores = model(data)
                loss = criterion(scores, targets)
                losses.append(loss.item())

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                features = data.reshape(data.shape[0], -1)
                img_grid = torchvision.utils.make_grid(data)
                _, predictions = scores.max(1)
                num_correct = (predictions == targets).sum()
                running_train_acc = float(num_correct) / float(data.shape[0])

                loop.set_description(f"Batch Size {batch_size}, RL {learning_rate}, Epoch [{epoch+1}/{num_epochs}] ")
                loop.set_postfix(loss = loss.item(), acc=running_train_acc)

Batch Size 16, RL 0.1, Epoch [0/5] : 100%|██████████| 3750/3750 [00:43<00:00, 86.10it/s, acc=0.688, loss=0.733]
Batch Size 16, RL 0.1, Epoch [1/5] : 100%|██████████| 3750/3750 [00:43<00:00, 85.82it/s, acc=0.375, loss=1.62]
Batch Size 16, RL 0.1, Epoch [2/5] : 100%|██████████| 3750/3750 [00:43<00:00, 85.77it/s, acc=0.938, loss=0.193]
Batch Size 16, RL 0.1, Epoch [3/5] : 100%|██████████| 3750/3750 [00:43<00:00, 86.05it/s, acc=0.812, loss=0.487]
Batch Size 16, RL 0.1, Epoch [4/5] : 100%|██████████| 3750/3750 [00:43<00:00, 85.60it/s, acc=0.938, loss=0.416]
Batch Size 16, RL 0.01, Epoch [0/5] : 100%|██████████| 3750/3750 [00:43<00:00, 85.98it/s, acc=0.875, loss=0.233]
Batch Size 16, RL 0.01, Epoch [1/5] : 100%|██████████| 3750/3750 [00:43<00:00, 85.90it/s, acc=0.938, loss=0.0836]
Batch Size 16, RL 0.01, Epoch [2/5] : 100%|██████████| 3750/3750 [00:43<00:00, 85.79it/s, acc=1, loss=0.00628]
Batch Size 16, RL 0.01, Epoch [3/5] : 100%|██████████| 3750/3750 [00:43<00:00, 85.24it/s, acc=1, loss=0