In [15]:
import torch, torchvision
from torchvision import transforms
import matplotlib.pyplot as plt

from torch import nn, optim

In [10]:
# refer to previous notebooks to details
transform = transforms.Compose([transforms.ToTensor()])
trainset = torchvision.datasets.MNIST(root="./data", train=True, transform=transform, download=True)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

In [4]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        # use a linear layer, please refer to docs to understand params
        self.hidden = nn.Linear(28*28, 512)
        # output layer's input should match hidden layer's output (512)
        # mnist contains 10 classes. thats why output is 10
        self.output = nn.Linear(512, 10)
        
        # we will use softmax activation function to give probability distributed across K classes
        # softmax is gonna be activation funciton for the output layer
        # while sigmoid is gonna be used for hidden layers 

        # lets use the imported sigmoid from nn module
        self.sigmoid = nn.Sigmoid()
        
        # refer to LogSoftmax docs to understand difference from Softmax and dim parameter
        self.softmax = nn.LogSoftmax(dim=1)
    
    def forward(self, x):
        # just thread last x on activation functions
        x = self.hidden(x)
        # pipe the output of the hidden in the sigmoid activation function
        x = self.sigmoid(x)
        x = self.output(x)
        x = self.softmax(x)
        
        return x

In [5]:
model = Net()

In [6]:
model

Net(
  (hidden): Linear(in_features=784, out_features=512, bias=True)
  (output): Linear(in_features=512, out_features=10, bias=True)
  (sigmoid): Sigmoid()
  (softmax): LogSoftmax(dim=1)
)

In [8]:
# to calculate the loss, follow the pytorch convention (criterion)
criterion = nn.NLLLoss()

In [11]:
images, labels = next(iter(trainloader))

In [13]:
# -1 means pytorch flatten
images = images.view(images.shape[0], -1)
# log probability for output
logits = model(images)
loss = criterion(logits, labels)
# get the loss and the computational graph (negative log likelyhood loss)
loss

tensor(2.3321, grad_fn=<NllLossBackward0>)

In [14]:
# to do the backward
loss.backward()
# pytorch calculates the backwards and calculate gradients

In [16]:
# to update weights, use optimizers
optimizer = optim.SGD(model.parameters(), lr=0.01)

In [17]:
optimizer.step()

In [18]:
# we have finished the first epoch
# forward prop
# -> calculate loss
# -> backward prop to get gradients
# -> use gradients to get weights