Load in the data first

In [3]:
import torch
from torch import nn
import torch.nn.functional as F
from torchvision import datasets, transforms
from collections import OrderedDict

transform = transforms.Compose([transforms.ToTensor(),
                               transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))])

trainset = datasets.MNIST('~/.pytorch/MNIST_data/', download=True, train=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

Define the model

In [7]:
model = nn.Sequential(
    OrderedDict([
        ('hidden1', nn.Linear(784,256)),
        ('Relu1', nn.ReLU()),
        ('hidden2', nn.Linear(256,64)),
        ('Relu2', nn.ReLU()),
        ('output', nn.Linear(64,10)),
        ('log_softmax', nn.LogSoftmax(dim=1))
    ]))

model

Sequential(
  (hidden1): Linear(in_features=784, out_features=256, bias=True)
  (Relu1): ReLU()
  (hidden2): Linear(in_features=256, out_features=64, bias=True)
  (Relu2): ReLU()
  (output): Linear(in_features=64, out_features=10, bias=True)
  (log_softmax): LogSoftmax()
)

Setting up loss function

In [20]:
criterion = nn.NLLLoss() # Negative Log Likelihood loss

# Run
images, labels = next(iter(trainloader))

# Loss calc for single image
out = model(images[0].view(1,-1))
probabilities = torch.exp(out)
print(probabilities)

print(criterion(out, labels[0].view(1)))

# Loss calc for a batch
logits = model(images.view(images.shape[0], -1)) # Basically batch size * 784 here

loss = criterion(logits, labels)

print(loss)

tensor([[0.0863, 0.0893, 0.0845, 0.0690, 0.1152, 0.1195, 0.1027, 0.1129, 0.1217,
         0.0989]], grad_fn=<ExpBackward>)
tensor(2.4709, grad_fn=<NllLossBackward>)
tensor(2.3353, grad_fn=<NllLossBackward>)
