In [1]:
#Import packages
import torch
from torch import nn
import torch.nn.functional as F
from torchvision import datasets, transforms

In [2]:
#Import the dataset
transform = transforms.Compose([transforms.ToTensor(),
                              transforms.Normalize((0.5,), (0.5,)),])

trainset = datasets.MNIST('~/.pytorch/MNIST_data', download=True, train=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

In [4]:
#Iterate through the data
images, labels = next(iter(trainloader))
print(images.shape)

torch.Size([64, 1, 28, 28])


In [10]:
#Building the network
model = nn.Sequential(nn.Linear(784, 128),
                     nn.ReLU(),
                     nn.Linear(128, 64),
                     nn.ReLU(),
                     nn.Linear(64, 10))

#Define the loss
criterion = nn.CrossEntropyLoss()

#Flatten the image
images = images.view(images.shape[0], -1)
print(images.shape)

#Perform a forward pass
logits = model(images)
loss = criterion(logits, labels)

print(loss)

torch.Size([64, 784])
tensor(2.3030, grad_fn=<NllLossBackward>)


In [16]:
#Building the network with LogSoftmax
model = nn.Sequential(nn.Linear(784, 128),
                     nn.ReLU(),
                     nn.Linear(128, 64),
                     nn.ReLU(),
                     nn.Linear(64, 10),
                     nn.LogSoftmax(dim=1))

#Define the loss
criterion = nn.NLLLoss()

#Flatten the image
images = images.view(images.shape[0], -1)
print(images.shape)

#Perform a forward pass
logps = model(images)
loss = criterion(log, labels)

print(loss)

torch.Size([64, 784])
tensor(2.3297, grad_fn=<NllLossBackward>)


In [17]:
#Using autograd to calculate gradient
loss.backward()

In [19]:
#Creating an optimizer
from torch import optim

optimizer = optim.SGD(model.parameters(), lr=0.01)

In [3]:
#Training the network
from torch import optim

model = nn.Sequential(nn.Linear(784, 128),
                     nn.ReLU(),
                     nn.Linear(128, 64),
                     nn.ReLU(),
                     nn.Linear(64, 10),
                     nn.LogSoftmax(dim=1))

#Creating an optimizer
optimizer = optim.SGD(model.parameters(), lr=0.003)

#Creating a loss function
criterion = nn.NLLLoss()

#Training the model
epochs = 5
for i in range(epochs):
    running_loss = 0
    print("Epoch : ", i+1, " ...")
    for images, labels in trainloader:
        #Flatten the image
        images = images.view(images.shape[0], -1)

        optimizer.zero_grad()

        logits = model(images)
        loss = criterion(logits, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
    else:
        print(running_loss/len(trainloader))
    

Epoch :  1  ...
1.8684164190343193
Epoch :  2  ...
0.8248680303536499
Epoch :  3  ...
0.5045409426887406
Epoch :  4  ...
0.4181114574040431
Epoch :  5  ...
0.3775887156822788


In [74]:
images, labels = next(iter(trainloader))
img = images[0].view(1, 784)

with torch.no_grad():
    logps = model(img)

ps = torch.exp(logps)
print("Prediction", ps.argmax())
print("Actual", labels[0])

Prediction tensor(1)
Actual tensor(1)
