In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.datasets as datasets
import torchvision.transforms as transforms

## Load and Preprocess Data

In [2]:
preprocess = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, ), (0.5, ))
])

In [3]:
trainset = datasets.MNIST('dataset/', train=True, download=True, transform=preprocess)
testset = datasets.MNIST('dataset/', train=False, download=True, transform=preprocess)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to dataset/MNIST/raw/train-images-idx3-ubyte.gz


100.0%


Extracting dataset/MNIST/raw/train-images-idx3-ubyte.gz to dataset/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to dataset/MNIST/raw/train-labels-idx1-ubyte.gz


102.8%


Extracting dataset/MNIST/raw/train-labels-idx1-ubyte.gz to dataset/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to dataset/MNIST/raw/t10k-images-idx3-ubyte.gz


100.0%


Extracting dataset/MNIST/raw/t10k-images-idx3-ubyte.gz to dataset/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to dataset/MNIST/raw/t10k-labels-idx1-ubyte.gz


112.7%
  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


Extracting dataset/MNIST/raw/t10k-labels-idx1-ubyte.gz to dataset/MNIST/raw

Processing...
Done!


In [4]:
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=True)

## Building the Neural Network

In [5]:
nf = 32
lr = 0.0001
beta1 = 0.5
beta2 = 0.999
device = "cuda" if torch.cuda.is_available() else "cpu"

In [6]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()

        self.convs = nn.Sequential(
            nn.Conv2d(1, nf, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),

            nn.Conv2d(nf, nf, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2), # resultant size will be 32x14x14

            nn.Conv2d(nf, nf, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2), # resultant size will be 32x7x7
        )

        self.linears = nn.Sequential(
            nn.Linear(1568, 100), # width=7, height=7, filters=32; linear layer input = 7*7*32 = 1568
            nn.ReLU(),

            nn.Linear(100, 50),
            nn.ReLU(),

            nn.Linear(50, 10),
        )

    def forward(self, x):
        x = self.convs(x)
        x = x.view(x.size(0), -1) # flattening, result will be (64, 1568)
        x = self.linears(x)
        return x

### Convolution size formula

outputWidth = (inputWidth - filterSize + (2 * padding)) / stride + 1

outputHeight = (inputHeight - filterSize + (2 * padding)) / stride + 1

### Max pool size formula

outputWidth = (inputWidth - filterSize) / stride + 1

outputHeight = (inputHeight - filterSize) / stride + 1


## Write the Training Loop

In [7]:
model = Net().to(device)
optimizer = optim.Adam(model.parameters(), lr=lr, betas=(beta1, beta2))
criterion_CE = nn.CrossEntropyLoss().to(device)

In [9]:
epoch = 3
model.train()
for e in range(epoch):
    print(f'Starting epoch {e} of {epoch}')
    for X, y in trainloader:
        X = X.to(device)
        predictions = model(X)
        optimizer.zero_grad()
        loss = criterion_CE(predictions, y)
        loss.backward()
        optimizer.step()
    print(f'Loss: {loss.item()}')

torch.save(model.state_dict(), "model.pt")

Starting epoch 0 of 3
Loss: 0.03078802116215229
Starting epoch 1 of 3
Loss: 0.05547785386443138
Starting epoch 2 of 3
Loss: 0.06214237958192825


## Write the Testing Loop

In [10]:
model.eval()
correct = 0
for X, y in testloader:
    with torch.no_grad():
        X = X.to(device)
        output = model(X)
        predictions = output.max(1)[1]
        correct += torch.eq(predictions, y).sum()

print(f'accuracy: {int(correct)}/{len(testloader.dataset)} ({int(correct)/len(testloader.dataset)} or {int(correct)/len(testloader.dataset) * 100}%)')

accuracy: 9851/10000 (0.9851 or 98.50999999999999%)
