In [1]:
%load_ext pycodestyle_magic
%flake8_on

In [2]:
import numpy as np
import pandas as pd
import torch
from torch import nn
from torchvision.datasets import MNIST
import torch.nn.functional as F

In [3]:
trainset = MNIST('../', download=True, train=True)
testset = MNIST('../', download=True, train=False)

y_trainset = trainset.targets
y_testset = testset.targets

# this time, we resize the data to have directly one channel for convolutions
trainset = trainset.data.reshape(60000, 1, 28, 28).to(torch.float32)
testset = testset.data.reshape(10000, 1, 28, 28).to(torch.float32)

# normalize
m, s = trainset.mean(), trainset.std()
trainset = (trainset - m) / s
testset = (testset - m) / s

# Import (not define) useful classes
so far we've been defining Dataset, DataLoader and Optimizer. That was mainly an exercise to recreate them from scratch and get a deep understanding of what they do exactly. But we can now import them from Pytorch

In [4]:
from torch.utils.data import Dataset, DataLoader
from torch.utils.data.dataset import random_split

from torch.optim import SGD

In [5]:
def accuracy(output, target):
    return (torch.argmax(output, dim=1) == target).float().mean()

In [6]:
# Still, we've got to create our own Dataset Class inheriting from Dataset
class MNIST_Dataset(Dataset):
    def __init__(self, x_tensor, y_tensor):
        self.x = x_tensor
        self.y = y_tensor

    def __getitem__(self, idx):
        return (self.x[idx], self.y[idx])

    def __len__(self):
        return len(self.x)

In [7]:
x_train, x_valid = trainset[0:50000, :], trainset[50000:, :]
y_train, y_valid = y_trainset[0:50000], y_trainset[50000:]

train = MNIST_Dataset(x_train, y_train)
valid = MNIST_Dataset(x_valid, y_valid)

In [8]:
EPOCHS = 5
bs = 64
lr = 0.05
loss_func = F.cross_entropy


train_dl = DataLoader(train, bs, shuffle=True)
valid_dl = DataLoader(valid, bs, shuffle=False)

In [9]:
# Define model


class Lambda(nn.Module):
    def __init__(self, func):
        super().__init__()
        self.func = func

    def forward(self, x):
        return self.func(x)


def flatten(x):
    return x.view(x.shape[0], -1)


model = nn.Sequential(
    nn.Conv2d(in_channels=1, out_channels=8,
              kernel_size=3, stride=2, padding=1),  # bs*8*14*14
    nn.ReLU(),
    nn.Conv2d(8, 16, 3, 2, 1),  # bs*16*7*7
    nn.ReLU(),
    nn.Conv2d(16, 32, 3, 2, 1),  # bs * 32 * 4 * 4
    nn.ReLU(),
    nn.Conv2d(32, 64, 3, 2, 1),  # bs * 64 * 2 * 2
    nn.ReLU(),
    nn.Conv2d(64, 64, 3, 2, 1),  # bs * 64 * 1 * 1
    nn.AdaptiveAvgPool2d(1),
    Lambda(flatten),
    nn.Linear(64, 10)
)

opt = SGD(model.parameters(), lr)

In [10]:
model.cuda()
for i in range(EPOCHS):
    for xb, yb in train_dl:
        out = model(xb.cuda())
        loss = loss_func(out, yb.cuda())
        loss.backward()
        opt.step()
        opt.zero_grad()
    print(loss.item())

0.25031137466430664
0.007354259490966797
0.35215529799461365
0.016548097133636475


1:13: W291 trailing whitespace
4:31: W291 trailing whitespace
5:41: W291 trailing whitespace


0.011879026889801025
