##### Loading the MNIST dataset

In [90]:
import torch
import torchvision
from torch import nn
from torch.nn.functional import relu, max_pool2d, log_softmax
from torchvision import datasets, transforms
from torchinfo import summary
from torch import optim
from torch.optim.lr_scheduler import StepLR

In [91]:
batch_size_train = 256
batch_size_test = 1000
lr = 1e-3
gamma = 0.7

device = torch.accelerator.current_accelerator()
device

device(type='mps')

In [92]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

dataset1 = datasets.MNIST("data", train=True, download=True, transform=transform)
dataset2 = datasets.MNIST("data", train=False, transform=transform)

train_loader = torch.utils.data.DataLoader(dataset1, batch_size=batch_size_train, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset2, batch_size=batch_size_test, shuffle=True)

print(dataset1.data.shape)
print(dataset2.targets.shape)

# torch.Size([60000, 28, 28])
# torch.Size([10000])

torch.Size([60000, 28, 28])
torch.Size([10000])


In [93]:
class NeuralNetworkV1(nn.Module):
    def __init__(self):
        super(NeuralNetworkV1, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, stride=1)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1)
        self.dropout1 = nn.Dropout(0.25)
        self.dropout2 = nn.Dropout(0.5)
        self.fc1 = nn.Linear(9216, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, X):
        X = self.conv1(X)
        X = relu(X)
        X = self.conv2(X)
        X = relu(X)
        X = max_pool2d(X, 2)
        X = self.dropout1(X)
        X = torch.flatten(X, 1)
        X = self.fc1(X)
        X = relu(X)
        X = self.dropout2(X)
        X = self.fc2(X)
        return log_softmax(X, dim=1)


In [94]:
def train(model_train: nn.Module, loader, optimize, i, log_interval=5):
    model_train.train()
    for batch_idx, (data, target) in enumerate(loader):
        data, target = data.to(device), target.to(device)
        optimize.zero_grad()
        output = model_train(data)
        loss = nn.NLLLoss()(output, target)
        loss.backward()
        optimize.step()

        if batch_idx % log_interval == 0:
            # accuracy & loss
            print(f"Accuracy: {100 * (output.argmax(dim=1) == target).float().mean().item():.2f}%")
            print(
                f"Train Epoch: {i} [{batch_idx * len(data)}/{len(loader.dataset)} ({100. * batch_idx / len(loader):.0f}%)]\tLoss: {loss.item():.6f}")

In [95]:
model = NeuralNetworkV1().to(device)
optimizer = optim.Adadelta(model.parameters(), lr=lr)
scheduler = StepLR(optimizer, step_size=1, gamma=gamma)

epochs = 20
for epoch in range(1, epochs + 1):
    train(model, train_loader, optimizer, epoch)
    scheduler.step()

torch.save(model.state_dict(), "mnist_cnn.pt")

Accuracy: 8.20%
Accuracy: 10.55%
Accuracy: 10.55%
Accuracy: 8.98%
Accuracy: 7.81%
Accuracy: 11.33%
Accuracy: 11.33%
Accuracy: 12.50%
Accuracy: 12.89%
Accuracy: 14.06%
Accuracy: 11.33%
Accuracy: 14.84%
Accuracy: 12.89%
Accuracy: 17.97%
Accuracy: 14.45%
Accuracy: 15.23%
Accuracy: 17.97%
Accuracy: 19.14%
Accuracy: 23.05%
Accuracy: 21.09%
Accuracy: 20.70%
Accuracy: 23.83%
Accuracy: 23.44%
Accuracy: 23.05%
Accuracy: 24.22%
Accuracy: 25.00%
Accuracy: 25.78%
Accuracy: 24.22%
Accuracy: 26.56%
Accuracy: 25.39%
Accuracy: 31.25%
Accuracy: 28.12%
Accuracy: 27.34%
Accuracy: 24.61%
Accuracy: 23.05%
Accuracy: 29.30%
Accuracy: 26.95%
Accuracy: 27.73%
Accuracy: 28.52%
Accuracy: 30.86%
Accuracy: 36.33%
Accuracy: 33.59%
Accuracy: 35.16%
Accuracy: 33.98%
Accuracy: 35.55%
Accuracy: 37.50%
Accuracy: 42.97%
Accuracy: 41.02%
Accuracy: 34.77%
Accuracy: 36.72%
Accuracy: 41.41%
Accuracy: 35.16%
Accuracy: 36.72%
Accuracy: 36.33%
Accuracy: 43.75%
Accuracy: 41.41%
Accuracy: 41.80%
Accuracy: 35.94%
Accuracy: 36.72%
