In [7]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor, Lambda

training_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor()
)

test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor()
)

train_dataloader = DataLoader(training_data, batch_size=64)
test_dataloader = DataLoader(test_data, batch_size=64)

class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10),
            nn.ReLU()
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

model = NeuralNetwork()

In [8]:
learning_rate = 1e-3
batch_size = 64
epochs = 5

In [9]:
# Initialize the loss function
loss_fn = nn.CrossEntropyLoss()

In [10]:
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

Inside the training loop, optimization happens in three steps:
1. Call optimizer.zero_grad() to reset the gradients of model parameters. Gradients by default add up; to prevent double-counting, we explicitly zero them at each iteration.
2. Backpropagate the prediction loss with a call to loss.backwards(). PyTorch deposits the gradients of the loss w.r.t. each parameter.
3. Once we have our gradients, we call optimizer.step() to adjust the parameters by the gradients collected in the backward pass.

In [11]:
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    for batch, (X, y) in enumerate(dataloader):
        # Compute prediction and loss
        pred = model(X)
        loss = loss_fn(pred, y)
        
        # backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")
            
def test_loop(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    test_loss, correct = 0, 0
    
    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
            
    test_loss /= size
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [12]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

epochs = 10
for t in range(epochs):
    print(f"Epoch {t+1}\n----------------------------")
    train_loop(train_dataloader, model, loss_fn, optimizer)
    test_loop(test_dataloader, model, loss_fn)
print("Done!")

Epoch 1
----------------------------
loss: 2.302375 [    0/60000]
loss: 2.294302 [ 6400/60000]
loss: 2.287977 [12800/60000]
loss: 2.288705 [19200/60000]
loss: 2.277148 [25600/60000]
loss: 2.269979 [32000/60000]
loss: 2.265955 [38400/60000]
loss: 2.255384 [44800/60000]
loss: 2.260300 [51200/60000]
loss: 2.243234 [57600/60000]
Test Error: 
 Accuracy: 33.6%, Avg loss: 0.035353 

Epoch 2
----------------------------
loss: 2.243720 [    0/60000]
loss: 2.233844 [ 6400/60000]
loss: 2.216624 [12800/60000]
loss: 2.243620 [19200/60000]
loss: 2.207286 [25600/60000]
loss: 2.197546 [32000/60000]
loss: 2.194533 [38400/60000]
loss: 2.176546 [44800/60000]
loss: 2.191401 [51200/60000]
loss: 2.152912 [57600/60000]
Test Error: 
 Accuracy: 33.3%, Avg loss: 0.033982 

Epoch 3
----------------------------
loss: 2.157923 [    0/60000]
loss: 2.130431 [ 6400/60000]
loss: 2.090297 [12800/60000]
loss: 2.128157 [19200/60000]
loss: 2.060083 [25600/60000]
loss: 2.077972 [32000/60000]
loss: 2.048084 [38400/60000]
lo