In [51]:
import torch
from torch import nn
from torch import optim
from torchvision import datasets, transforms
from torch.utils.data import random_split, DataLoader

In [52]:
torch.randn(5).cuda()

tensor([ 1.0224, -0.2315,  0.9420,  0.3910, -2.6430], device='cuda:0')

In [48]:
# Define a simple model
model = nn.Sequential(
    nn.Linear(28 * 28, 64),
    nn.ReLU(),
    nn.Linear(64, 64),
    nn.ReLU(),
    nn.Dropout(0.1),
    nn.Linear(64, 10)
)

In [53]:
# Fanicer Model

class ResNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.l1 = nn.Linear(28 * 28, 64)
        self.l2 = nn.Linear(64, 64)
        self.l3 = nn.Linear(64, 10)
        self.do = nn.Dropout(0.1)

    def forward(self, x):
        h1 = nn.functional.relu(self.l1(x))
        h2 = nn.functional.relu(self.l2(h1))
        do = self.do(h2 + h1)
        logits = self.l3(do)
        return logits
model = ResNet().cuda()

In [54]:
# Optimizer
optimiser = optim.SGD(model.parameters(), lr = 1e-2)

loss = nn.CrossEntropyLoss()

In [56]:
# Train, Val split
train_data = datasets.MNIST('data', train=True, download=True, transform=transforms.ToTensor())
train, val = random_split(train_data, [55000, 5000])
train_loader = DataLoader(train, batch_size =32)
val_loader = DataLoader(val, batch_size=32)

In [57]:
# Training and validation loops
nb_epochs = 5
for epoch in range(nb_epochs):
    losses = list()
    accuracies = list()
    model.train()
    for batch in train_loader:
        x, y = batch

        # x: b x 1 x 28 x 28
        b = x.size(0)
        x = x.view(b, -1).cuda()

        #1 forward training
        l = model(x)  # l: logits

        #2 obective function
        J = loss(l, y.cuda())

        #3 cleaning the gradients
        model.zero_grad()

        #4 accumulate the partial derivatives of J w.r.t. the parameters
        J.backward()

        #5 step in opposite direction of the gradient
        optimiser.step()
            #with torch.no_grad(): params = params - eta * params.grad (manual version of step above) (params is dict so use for every item in...)

        losses.append(J.item())
        accuracies.append(y.eq(l.detach().argmax(dim=1).cpu()).float().mean())

    print(f'Epoch {epoch + 1}, train loss: {torch.tensor(losses).mean():.2f}, train acc: {torch.tensor(accuracies).mean():.2f}')

    losses = list()
    accuracies = list()
    model.eval()

    for batch in val_loader:
        x, y = batch

        # x: b x 1 x 28 x 28
        b = x.size(0)
        x = x.view(b, -1).cuda()

        #1 forward training
        with torch.no_grad():
            l = model(x) # logits

        #2 computing the obective function
        J = loss(l, y.cuda())

        losses.append(J.item())
        accuracies.append(y.eq(l.detach().argmax(dim=1).cpu()).float().mean())

    print(f'Epoch {epoch + 1}, validation loss: {torch.tensor(losses).mean():.3f}, train acc: {torch.tensor(accuracies).mean():.3f}')


Epoch 1, train loss: 0.86, train acc: 0.78
Epoch 1, validation loss: 0.381, train acc: 0.888
Epoch 2, train loss: 0.38, train acc: 0.89
Epoch 2, validation loss: 0.300, train acc: 0.905
Epoch 3, train loss: 0.31, train acc: 0.91
Epoch 3, validation loss: 0.259, train acc: 0.917
Epoch 4, train loss: 0.27, train acc: 0.92
Epoch 4, validation loss: 0.228, train acc: 0.929
Epoch 5, train loss: 0.24, train acc: 0.93
Epoch 5, validation loss: 0.206, train acc: 0.936
