# MNIST for simple introduction to coding with pytorch
Note that if a large enough model is used (approx [784 x 128 x 1024 x 1024 x 128 x 10] in the case of NMIST) <code>.cuda()</code> should be used. Append this to the model declaration and each time we call for the data (either labels or the pure data).
## Notes
1. Need to understand how the data is sent to the model
2. Need to know how to evaluate some data
    - Here I could start feeding it noise
    - Make sure I can see all the outputs (scalar values in the last length 10 vector)

In [None]:
import torch
from torch import nn
from torch import optim
from torchvision import datasets, transforms
from torch.utils.data import random_split, DataLoader
import time

In [None]:
# Create the neural network
model = nn.Sequential(
    nn.Linear(28 * 28, 64),
    nn.ReLU(),
    nn.Linear(64, 64),
    nn.ReLU(),
    nn.Linear(64, 64),
    nn.ReLU(),
    nn.Linear(64, 10)
)

In [None]:
# Use an optimizer
# SGD = stochastic gradient descent
# lr = learning rate
params = model.parameters()
optimizer = optim.SGD(params, lr=.01)

In [None]:
# Create a cost function / loss function
loss = nn.CrossEntropyLoss()

In [None]:
# load the data
# to download data on device change "download" to "True"
train_data = datasets.MNIST('data', train=True, download=False, transform=transforms.ToTensor())
train, val = random_split(train_data, [55000, 5000])
train_loader = DataLoader(train, batch_size=32)
val_loader = DataLoader(val, batch_size=32)

In [None]:
# Create a training and validation loop
t1 = time.time()

n_epochs = 5 # how many times we loop through the data set
for epoch in range(n_epochs):
    # training loop
    losses = []
    for batch in train_loader:
        x, y = batch

        # x: b x 1 x 28 x 28 --> vector
        # colour had been 3 x 28 x 28
        b = x.size(0)
        x = x.view(b, -1)

        # step 1 (forward)
        l = model(x) # l is logit

        # step 2 (compute objective function)
        J = loss(l, y) # y is label

        # step 3 (cleaning the gradient)
        model.zero_grad()

        # step 4 (accumulate the partial derivatives of J wrt the parameters
        J.backward()

        # step 5 (step in descent direction)
        optimizer.step()

        losses.append(J.item())

    print(f"Epoch {epoch + 1}, train loss: {torch.tensor(losses).mean():.2f}")

    # validation loop
    losses = []
    for batch in val_loader:
        x, y = batch

        # x: b x 1 x 28 x 28 --> vector
        # colour had been 3 x 28 x 28
        b = x.size(0)
        x = x.view(b, -1)

        # step 1 (forward)
        with torch.no_grad():
            l = model(x) # l is logit

        # step 2 (compute objective function)
        J = loss(l, y) # y is label

        losses.append(J.item())

    print(f"Epoch {epoch + 1}, validation loss: {torch.tensor(losses).mean():.2f}")
t2 = time.time()
print(f"Took {t2-t1:.2f} seconds to compute")