Reference: https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/01-basics/feedforward_neural_network/main.py#L37-L49

Colab example: https://colab.research.google.com/github/omarsar/pytorch_notebooks/blob/master/pytorch_quick_start.ipynb

In [None]:
!pip3 install torch
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
print(torch.__version__)

In [None]:
BATCH_SIZE = 32

## transformations
transform = transforms.Compose(
    [transforms.ToTensor()])

## download and load training dataset
trainset = torchvision.datasets.MNIST(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE,
                                          shuffle=True, num_workers=2)

## download and load testing dataset
testset = torchvision.datasets.MNIST(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=BATCH_SIZE,
                                         shuffle=False, num_workers=2)

In [None]:
import matplotlib.pyplot as plt
import numpy as np

dataiter = iter(trainloader)
images, labels = dataiter.next()

for i in range(6):
  print(labels[i].numpy())
  plt.imshow(images[i,0].numpy())
  plt.show()
  print('')

In [None]:
class FFNet(nn.Module):
    def __init__(self, input_size, hidden_sizes, output_size):
        super(FFNet, self).__init__()
        sizes = [input_size] + hidden_sizes
        self.hidden = [nn.Linear(sizes[l], sizes[l+1]) for l in range(len(sizes)-1)]
        self.relu = nn.ReLU()
        self.output = nn.Linear(hidden_sizes[-1], output_size)  
    
    def forward(self, x):
        x = x.flatten(start_dim = 1)
        for layer in self.hidden:
          x = self.relu(layer(x))
        return self.output(x)

cross entropy loss

$$
\sum y_i \log p_i
$$

https://en.wikipedia.org/wiki/Cross_entropy

maximized when $y = p$

In [None]:
# Loss

def select_indices(x, indices):
  return x.gather(1, indices[:, None])

def mle_loss(x, labels):
  # equivalent to cross entropy loss where target probs are 1 on the correct labels
  # https://ml-cheatsheet.readthedocs.io/en/latest/loss_functions.html#cross-entropy
  logits = F.log_softmax(x, dim=1)
  return -select_indices(logits, labels).mean()

In [None]:
## test the model with 1 batch
model = FFNet(28*28, [200,100], 10)
images, labels = next(iter(trainloader))
print("batch size:", images.shape)
out = model(images)
print(out.shape)

mle_loss(out, labels)

In [None]:
def accuracy(logit, target, batch_size):
    corrects = (torch.max(logit, 1)[1].view(target.size()).data == target.data).sum()
    accuracy = corrects/batch_size
    return accuracy.item()

In [None]:
learning_rate = 0.001
num_epochs = 5

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)


for epoch in range(num_epochs):
    train_running_loss = 0.0
    train_acc = 0.0

    model = model.train()

    ## training step
    for i, (images, labels) in enumerate(trainloader):
        
        images = images.to(device)
        labels = labels.to(device)

        ## forward + backprop + loss
        logits = model(images)
        loss = mle_loss(logits, labels)
        optimizer.zero_grad()
        loss.backward()

        ## update model params
        optimizer.step()

        train_running_loss += loss.detach().item()
        train_acc += accuracy(logits, labels, BATCH_SIZE)
    
    model.eval()
    print('Epoch: %d | Loss: %.4f | Train Accuracy: %.2f' \
          %(epoch, train_running_loss / i, train_acc/i))  