# Losses in PyTorch

In [1]:
import torch
from torch import nn
import torch.nn.functional as F
from torchvision import datasets, transforms

In [2]:
transform = transforms.Compose([transforms.Scale(28,28),
                                transforms.ToTensor(), 
                               transforms.Normalize([0.5],[0.5]),
                             ])
                              
trainset = datasets.MNIST('MNIST_data/', download = True, train = True,transform= transform) 
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64,shuffle=True)                              



In [4]:
model = nn.Sequential(nn.Linear(784,128),
                     nn.ReLU(),
                     nn.Linear(128,64),
                     nn.ReLU(),
                     nn.Linear(64,10))
#Define the loss
criterion = nn.CrossEntropyLoss()

#Get our data
images, labels = next(iter(trainloader))

#Flatten Images
images = images.view(images.shape[0], -1)

#Forward pass get our logits
logits = model(images)
#Calculate the loss with the logits and the labels
loss = criterion(logits,labels)

print(loss)

tensor(2.3020, grad_fn=<NllLossBackward>)


In [6]:
model =  nn.Sequential(nn.Linear(784,128),
                     nn.ReLU(),
                     nn.Linear(128,64),
                     nn.ReLU(),
                     nn.Linear(64,10),
                     nn.LogSoftmax(dim=1))

#Define the loss
criterion = nn.NLLLoss()

#Get our data
images, labels = next(iter(trainloader))

#Flatten Images
images = images.view(images.shape[0], -1)

#Forward pass get our logits
logits = model(images)
#Calculate the loss with the logits and the labels
loss = criterion(logits,labels)

print(loss)

tensor(2.3087, grad_fn=<NllLossBackward>)


# Autograd

In [7]:
x=torch.randn(2,2, requires_grad=True)
print(x)

tensor([[-2.6108,  0.0866],
        [-0.3569, -0.7455]], requires_grad=True)


In [8]:
y=x**2
print(y)

tensor([[6.8164, 0.0075],
        [0.1274, 0.5558]], grad_fn=<PowBackward0>)


In [9]:
 print(y.grad_fn)

<PowBackward0 object at 0x7fc82bded048>


In [10]:
z=y.mean()
print(z)

tensor(1.8768, grad_fn=<MeanBackward0>)


In [11]:
print(x.grad)

None


In [14]:
z.backward()
print(x.grad)
print(x/2)

tensor([[-1.3054,  0.0433],
        [-0.1784, -0.3728]])
tensor([[-1.3054,  0.0433],
        [-0.1784, -0.3728]], grad_fn=<DivBackward0>)


# Loss and Autograd together

In [15]:
model = nn.Sequential(nn.Linear(784,128),
                     nn.ReLU(),
                     nn.Linear(128,64),
                     nn.ReLU(),
                     nn.Linear(64,10),
                     nn.LogSoftmax(dim=1))

criterion = nn.NLLLoss()
images, labels = next(iter(trainloader))
images = images.view(images.shape[0], -1)

logps = model(images)
loss = criterion(logps, labels)

In [16]:
print('before backward pass: \n',model[0].weight.grad)

loss.backward()

print('after backward pass: \n', model[0].weight.grad)

before backward pass: 
 None
after backward pass: 
 tensor([[-1.5259e-03, -1.5259e-03, -1.5259e-03,  ..., -1.5259e-03,
         -1.5259e-03, -1.5259e-03],
        [-1.2131e-03, -1.2131e-03, -1.2131e-03,  ..., -1.2131e-03,
         -1.2131e-03, -1.2131e-03],
        [-6.3747e-04, -6.3747e-04, -6.3747e-04,  ..., -6.3747e-04,
         -6.3747e-04, -6.3747e-04],
        ...,
        [ 2.4971e-03,  2.4971e-03,  2.4971e-03,  ...,  2.4971e-03,
          2.4971e-03,  2.4971e-03],
        [ 7.1835e-05,  7.1835e-05,  7.1835e-05,  ...,  7.1835e-05,
          7.1835e-05,  7.1835e-05],
        [-1.1955e-03, -1.1955e-03, -1.1955e-03,  ..., -1.1955e-03,
         -1.1955e-03, -1.1955e-03]])


# Training the network!

In [22]:
from torch import optim

optimizer = optim.SGD(model.parameters(), lr=0.01)

In [23]:
print('Initial weights - ',model[0].weight)

images,labels= next(iter(trainloader))
images.resize_(64,784)

optimizer.zero_grad()

output = model.forward(images)
loss = criterion(output, labels)
loss.backward()
print('Gradient - ',model[0].weight.grad)

Initial weights -  Parameter containing:
tensor([[ 0.0071, -0.0102, -0.0332,  ..., -0.0307,  0.0094,  0.0093],
        [-0.0280,  0.0121, -0.0272,  ..., -0.0090, -0.0073, -0.0117],
        [ 0.0221,  0.0115,  0.0224,  ...,  0.0069, -0.0307, -0.0194],
        ...,
        [-0.0082,  0.0154, -0.0095,  ...,  0.0245,  0.0268,  0.0316],
        [ 0.0014,  0.0014,  0.0211,  ...,  0.0216,  0.0219, -0.0143],
        [ 0.0146, -0.0357,  0.0290,  ..., -0.0042,  0.0004, -0.0251]],
       requires_grad=True)
Gradient -  tensor([[ 0.0016,  0.0016,  0.0016,  ...,  0.0016,  0.0016,  0.0016],
        [-0.0006, -0.0006, -0.0006,  ..., -0.0006, -0.0006, -0.0006],
        [ 0.0015,  0.0015,  0.0015,  ...,  0.0015,  0.0015,  0.0015],
        ...,
        [-0.0004, -0.0004, -0.0004,  ..., -0.0004, -0.0004, -0.0004],
        [ 0.0010,  0.0010,  0.0010,  ...,  0.0010,  0.0010,  0.0010],
        [-0.0004, -0.0004, -0.0004,  ..., -0.0004, -0.0004, -0.0004]])


In [24]:
optimizer.step()
print('Updated weights = ',model[0].weight)

Updated weights =  Parameter containing:
tensor([[ 0.0071, -0.0102, -0.0333,  ..., -0.0307,  0.0094,  0.0092],
        [-0.0279,  0.0121, -0.0272,  ..., -0.0090, -0.0073, -0.0117],
        [ 0.0221,  0.0114,  0.0223,  ...,  0.0069, -0.0307, -0.0194],
        ...,
        [-0.0082,  0.0154, -0.0095,  ...,  0.0245,  0.0268,  0.0317],
        [ 0.0014,  0.0013,  0.0211,  ...,  0.0216,  0.0219, -0.0143],
        [ 0.0146, -0.0357,  0.0290,  ..., -0.0042,  0.0004, -0.0251]],
       requires_grad=True)
