In [1]:
from torchvision.datasets import MNIST
from torchvision.transforms import ToTensor

tf = ToTensor()
train_dataset = MNIST(root='./data', train=True, transform=tf, download=True)
test_dataset = MNIST(root='./data', train=False, transform=tf)


In [2]:
from torch.utils.data import DataLoader

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [146]:
from torch import nn

class ConvolutionNetwork(nn.Module):
  '''Network with convolutional layers'''
  def __init__(self):
    super().__init__()
    self.convolution_stack = nn.Sequential(
      # convolutional part
      nn.Conv2d(1, 10, kernel_size=3),
      nn.ReLU(),
      nn.Conv2d(10, 20, kernel_size=3),
      nn.ReLU(),
      nn.MaxPool2d(kernel_size=2)
    )
    self.dense_stack = nn.Sequential(
      # dense part
      nn.Linear(2880, 128),
      nn.ReLU(),
      nn.Dropout(0.1),
      nn.Linear(128, 10),
      nn.Softmax(dim=1)
    )
    self.flatten = nn.Flatten()
  
  def forward(self, x):
    x = self.convolution_stack(x)
    x = self.flatten(x)
    x = self.dense_stack(x)
    return x


In [5]:
from torch import nn

class CNN(nn.Module):
  '''Network created with some help'''
  def __init__(self):
    super().__init__()
    self.convolution_stack = nn.Sequential(
      nn.Conv2d(1, 32, kernel_size=3, padding=1),
      nn.ReLU(),
      nn.Conv2d(32, 64, kernel_size=3, padding=1),
      nn.ReLU(),
      nn.MaxPool2d(kernel_size=2),
      nn.Dropout2d(0.25)
    )
    self.flatten = nn.Flatten()
    self.dense_stack = nn.Sequential (
      nn.Linear(64 * 14 * 14, 128),
      nn.ReLU(),
      nn.Dropout(0.5),
      nn.Linear(128, 10),
      nn.Softmax(dim=1)
    )

  def forward(self, x):
    x = self.convolution_stack(x)
    x = self.flatten(x)
    x = self.dense_stack(x)
    return x


In [6]:
import torch

device = (
  'cuda' if torch.cuda.is_available() else
  'mps' if torch.backends.mps.is_available() else 
  'cpu'
)


In [140]:
device = 'cpu'

In [148]:
model = ConvolutionNetwork().to(device)

In [7]:
model = CNN().to(device)

In [8]:
from torch import optim
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [9]:
# get first batch
images, _ = next(iter(train_loader))
images = images.to(device)
prediction = model(images)
prediction

  nonzero_finite_vals = torch.masked_select(


tensor([[0.0900, 0.1045, 0.0978, 0.1035, 0.0969, 0.0991, 0.0927, 0.0930, 0.1103,
         0.1123],
        [0.0955, 0.1037, 0.1006, 0.1012, 0.0926, 0.0945, 0.1012, 0.0986, 0.1106,
         0.1015],
        [0.0957, 0.1067, 0.0971, 0.0982, 0.1012, 0.1013, 0.0926, 0.0935, 0.1128,
         0.1009],
        [0.0948, 0.1092, 0.1023, 0.1008, 0.0872, 0.0906, 0.0948, 0.1025, 0.1122,
         0.1056],
        [0.0943, 0.1024, 0.0905, 0.1041, 0.0998, 0.0966, 0.0971, 0.0963, 0.1173,
         0.1014],
        [0.1003, 0.1042, 0.0975, 0.0996, 0.0957, 0.0952, 0.0958, 0.0970, 0.1065,
         0.1081],
        [0.0951, 0.1050, 0.0958, 0.1053, 0.0970, 0.0957, 0.0925, 0.1003, 0.1103,
         0.1030],
        [0.0954, 0.1096, 0.0980, 0.1014, 0.0955, 0.1001, 0.0971, 0.0981, 0.1071,
         0.0976],
        [0.0912, 0.1052, 0.1000, 0.1046, 0.0958, 0.0983, 0.0934, 0.0970, 0.1082,
         0.1062],
        [0.0944, 0.0981, 0.0926, 0.1101, 0.1006, 0.0933, 0.0909, 0.0965, 0.1202,
         0.1033],
        [0

In [10]:
epochs = 20
for current_epoch in range(epochs):
  for images, labels in train_loader:
    images, labels = images.to(device), labels.to(device)
    optimizer.zero_grad()
    predictions = model(images)
    loss = loss_function(predictions, labels)
    loss.backward()
    optimizer.step()
  print(f'Epoch {current_epoch}: last loss = {loss.item()}')

Epoch 0: last loss = 1.4687466621398926
Epoch 1: last loss = 1.463024616241455
Epoch 2: last loss = 1.4873768091201782
Epoch 3: last loss = 1.5252296924591064
Epoch 4: last loss = 1.4612271785736084
Epoch 5: last loss = 1.515791893005371
Epoch 6: last loss = 1.500974416732788
Epoch 7: last loss = 1.467198371887207
Epoch 8: last loss = 1.4959962368011475
Epoch 9: last loss = 1.4614734649658203
Epoch 10: last loss = 1.4631645679473877
Epoch 11: last loss = 1.4918979406356812
Epoch 12: last loss = 1.4612102508544922
Epoch 13: last loss = 1.4611504077911377
Epoch 14: last loss = 1.4935476779937744
Epoch 15: last loss = 1.4611501693725586
Epoch 16: last loss = 1.4611501693725586
Epoch 17: last loss = 1.461150050163269
Epoch 18: last loss = 1.4935702085494995
Epoch 19: last loss = 1.4611506462097168


In [11]:
correct, total = 0, 0
model.eval()
with torch.no_grad():
  for images, labels in test_loader:
    # print(images.shape)
    images, labels = images.to(device), labels.to(device)
    predictions = model(images)
    # print(predictions.shape)
    _, predicted = torch.max(predictions.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum().item()
accuracy = correct / total
print(f'Accuracy: {accuracy: .2%}')

Accuracy:  98.95%


In [12]:
# save model
postfix = str(round(accuracy * 1e4))
path = f'./models/model_{postfix}.pth'
torch.save(model, path)
state = model.state_dict()
torch.save(state, f'{path}.state')
