In [20]:
import torch
from torch import nn
import torch.nn.functional as F
from torchvision import datasets, transforms

transform =transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5)),])

trainset=datasets.MNIST('~/.pytorch/MNIST_data/',download=True, train=True, transform=transform )
trainloader=torch.utils.data.DataLoader(trainset,batch_size=64,shuffle=True)

In [21]:
model=nn.Sequential(nn.Linear(784,128), nn.ReLU(),nn.Linear(128,64),nn.ReLU(),nn.Linear(64,10))
criterion=nn.CrossEntropyLoss()
images,labels=next(iter(trainloader))
images=images.view(images.shape[0],-1)

logits=model(images)
loss=criterion(logits,labels)
print(loss)

tensor(2.2972, grad_fn=<NllLossBackward>)


In [22]:
#USING  SOFTMAX FUNCTION

model=nn.Sequential(nn.Linear(784,128),nn.ReLU(),nn.Linear(128,64),nn.ReLU(),nn.Linear(64,10),nn.LogSoftmax(dim=1))
criterion=nn.NLLLoss()

images,labels=next(iter(trainloader))

images=images.view(images.shape[0],-1)
logps=model(images)
loss=criterion(logps,labels)
print(loss)

tensor(2.2886, grad_fn=<NllLossBackward>)


In [23]:
#Showing the working of Autograd. This is used for backpropagation

x=torch.randn(2,2,requires_grad=True)
print(x)

y=x**2
print(y)


tensor([[ 0.0131,  0.9596],
        [ 0.4929, -0.4052]], requires_grad=True)
tensor([[1.7173e-04, 9.2076e-01],
        [2.4291e-01, 1.6418e-01]], grad_fn=<PowBackward0>)


In [24]:
print(y.grad_fn)

<PowBackward0 object at 0x0000014D29C71E48>


In [25]:
z=y.mean()
print(z)

tensor(0.3320, grad_fn=<MeanBackward1>)


In [26]:
print(x.grad)

None


In [27]:
z.backward()

In [28]:
print(x.grad)

tensor([[ 0.0066,  0.4798],
        [ 0.2464, -0.2026]])


In [29]:
print(x/2)

tensor([[ 0.0066,  0.4798],
        [ 0.2464, -0.2026]], grad_fn=<DivBackward0>)


In [30]:
##Coming back to the the main problem. Using the Autograd with loss function
model = nn.Sequential(nn.Linear(784, 128),
                      nn.ReLU(),
                      nn.Linear(128, 64),
                      nn.ReLU(),
                      nn.Linear(64, 10),
                      nn.LogSoftmax(dim=1))

criterion = nn.NLLLoss()
images, labels = next(iter(trainloader))
images = images.view(images.shape[0], -1)

logps = model(images)
loss = criterion(logps, labels)

In [31]:
print('Before backward pass: \n', model[0].weight.grad)

loss.backward()

print('After backward pass: \n', model[0].weight.grad)

Before backward pass: 
 None
After backward pass: 
 tensor([[ 0.0006,  0.0006,  0.0006,  ...,  0.0006,  0.0006,  0.0006],
        [-0.0009, -0.0009, -0.0009,  ..., -0.0009, -0.0009, -0.0009],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        ...,
        [-0.0046, -0.0046, -0.0046,  ..., -0.0046, -0.0046, -0.0046],
        [ 0.0007,  0.0007,  0.0007,  ...,  0.0007,  0.0007,  0.0007],
        [-0.0002, -0.0002, -0.0002,  ..., -0.0002, -0.0002, -0.0002]])


In [32]:
from torch import optim


optimizer = optim.SGD(model.parameters(), lr=0.01)

In [33]:
print('Initial weights - ', model[0].weight)

images, labels = next(iter(trainloader))
images.resize_(64, 784)


optimizer.zero_grad()


output = model(images)
loss = criterion(output, labels)
loss.backward()
print('Gradient -', model[0].weight.grad)

Initial weights -  Parameter containing:
tensor([[ 0.0014,  0.0174, -0.0262,  ..., -0.0229,  0.0034, -0.0035],
        [ 0.0039,  0.0273,  0.0225,  ..., -0.0119, -0.0236, -0.0137],
        [ 0.0023, -0.0046,  0.0025,  ..., -0.0102, -0.0102,  0.0121],
        ...,
        [ 0.0024,  0.0224, -0.0145,  ..., -0.0077,  0.0317,  0.0152],
        [-0.0068, -0.0286, -0.0078,  ...,  0.0205, -0.0091, -0.0271],
        [-0.0213, -0.0109,  0.0202,  ...,  0.0353, -0.0085, -0.0178]],
       requires_grad=True)
Gradient - tensor([[ 0.0023,  0.0023,  0.0023,  ...,  0.0023,  0.0023,  0.0023],
        [ 0.0042,  0.0042,  0.0042,  ...,  0.0042,  0.0042,  0.0042],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        ...,
        [-0.0034, -0.0034, -0.0034,  ..., -0.0034, -0.0034, -0.0034],
        [-0.0009, -0.0009, -0.0009,  ..., -0.0009, -0.0009, -0.0009],
        [-0.0007, -0.0007, -0.0007,  ..., -0.0007, -0.0007, -0.0007]])


In [34]:
optimizer.step()
print('Updated weights - ', model[0].weight)

Updated weights -  Parameter containing:
tensor([[ 0.0013,  0.0174, -0.0262,  ..., -0.0229,  0.0034, -0.0035],
        [ 0.0039,  0.0273,  0.0225,  ..., -0.0120, -0.0236, -0.0137],
        [ 0.0023, -0.0046,  0.0025,  ..., -0.0102, -0.0102,  0.0121],
        ...,
        [ 0.0024,  0.0224, -0.0145,  ..., -0.0077,  0.0318,  0.0152],
        [-0.0068, -0.0286, -0.0078,  ...,  0.0205, -0.0091, -0.0270],
        [-0.0213, -0.0109,  0.0202,  ...,  0.0353, -0.0085, -0.0178]],
       requires_grad=True)


In [35]:
##Now training the same algorithm in a loop

model = nn.Sequential(nn.Linear(784, 128),
                      nn.ReLU(),
                      nn.Linear(128, 64),
                      nn.ReLU(),
                      nn.Linear(64, 10),
                      nn.LogSoftmax(dim=1))

criterion = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.003)

epochs = 5
for e in range(epochs):
    running_loss = 0
    for images, labels in trainloader:
        images = images.view(images.shape[0], -1)
    
        optimizer.zero_grad()
        
        output = model(images)
        loss = criterion(output, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    else:
        print(f"Training loss: {running_loss/len(trainloader)}")

Training loss: 1.8483884084199282
Training loss: 0.8130027138030351
Training loss: 0.5147685151872858
Training loss: 0.4228172221226987
Training loss: 0.37992673149622325


In [None]:
%matplotlib inline
import helper

images, labels = next(iter(trainloader))

img = images[0].view(1, 784)
# Turn off gradients to speed up this part
with torch.no_grad():
    logps = model(img)

# Output of the network are log-probabilities, need to take exponential for probabilities
ps = torch.exp(logps)
helper.view_classify(img.view(1, 28, 28), ps)