In [1]:
import torch

<h2> Understanding auto-grad and backward </h2>

In [2]:
x = torch.ones(2,2, requires_grad=True)
print(x)

tensor([[1., 1.],
        [1., 1.]], requires_grad=True)


In [3]:
y = x + 2
print(y)

tensor([[3., 3.],
        [3., 3.]], grad_fn=<AddBackward0>)


In [4]:
print(y.grad_fn)

<AddBackward0 object at 0x000001BBB63B42E0>


In [6]:
z = torch.ones(3,3)
print(z)

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]])


In [7]:
print(x.requires_grad)
print(y.requires_grad)
print(z.requires_grad)

True
True
False


In [8]:
z1 = z + 2
print(z1)
print(z1.requires_grad)

tensor([[3., 3., 3.],
        [3., 3., 3.],
        [3., 3., 3.]])
False


In [9]:
a = y*y*2
out = a.mean()
print(a)
print(out)
out.backward()

tensor([[18., 18.],
        [18., 18.]], grad_fn=<MulBackward0>)
tensor(18., grad_fn=<MeanBackward0>)


In [13]:
print(x.grad)

tensor([[3., 3.],
        [3., 3.]])


In [14]:
print(y.grad)

None


  print(y.grad)


In [15]:
%clear




In [26]:
x = torch.ones(2,2, requires_grad=True)
y = x + 2
z = y * y * 3
out = z.mean()
print(f'x = {x}')
print(f'y = {y}')
print(f'z = {z}')
print(f'out = {out}')


x = tensor([[1., 1.],
        [1., 1.]], requires_grad=True)
y = tensor([[3., 3.],
        [3., 3.]], grad_fn=<AddBackward0>)
z = tensor([[27., 27.],
        [27., 27.]], grad_fn=<MulBackward0>)
out = 27.0


In [27]:
out.backward()
print(x.grad)
print(y.grad)

tensor([[4.5000, 4.5000],
        [4.5000, 4.5000]])
None


  print(y.grad)


In [28]:
a1 = (x*x).mean()
a2 = (x*x*x).mean()
a1.backward()

print(x.grad)
a2.backward()
print(x.grad)


tensor([[5., 5.],
        [5., 5.]])
tensor([[5.7500, 5.7500],
        [5.7500, 5.7500]])


NOTE: .grad() doesn't depend on values of the variable, it's the relative change.
so z.backward() and x.grad() would be der(z)/der(x) when [x=1]. As we want change of z, per unit change of x.
Note Partial Differentiation only.

<h2>Training a NN </h2>

<h3>Build the model </h3>

In [36]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class NN(nn.Module):
    
    def __init__(self):
        super(NN, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 3)
        self.conv2 = nn.Conv2d(6, 16, 3)
        self.fc1 = nn.Linear(16*6*6, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        
    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), (2,2))
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    
    def num_flat_features(self, x):
        dims = x.size()[1:]
        num_features = 1
        for dim in dims:
            num_features *= dim
        return num_features
    

model = NN()
print(model)
        

NN(
  (conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=576, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


<h3> Weights of the model </h3>

In [37]:
params = list(model.parameters())
print(len(params))
print(params[0]) # Gives the weights of conv1
print(params[0].size()) 

10
Parameter containing:
tensor([[[[-0.1515,  0.2096,  0.1614],
          [-0.2639, -0.2187,  0.1909],
          [ 0.0599, -0.1754,  0.0781]]],


        [[[ 0.0513, -0.2952,  0.2416],
          [-0.0534, -0.2988,  0.0137],
          [-0.2120, -0.1890, -0.1163]]],


        [[[ 0.1324, -0.1637,  0.0086],
          [ 0.0960,  0.0004,  0.1656],
          [ 0.3150, -0.1174, -0.1840]]],


        [[[-0.0457,  0.3091,  0.2412],
          [ 0.0021,  0.2760, -0.2969],
          [ 0.1484, -0.1667,  0.0265]]],


        [[[-0.1142,  0.2913,  0.0114],
          [-0.0758,  0.2116,  0.2103],
          [-0.2824, -0.0644, -0.1789]]],


        [[[-0.1172, -0.0800, -0.1277],
          [-0.1049,  0.1313, -0.0928],
          [-0.2746,  0.3198, -0.0525]]]], requires_grad=True)
torch.Size([6, 1, 3, 3])


<h3> Evaluate Result </h3>

In [42]:
input_val = torch.randn(1,1,32,32)  # A rand 32x32 input
output_val = model(input_val)
print(output_val)
print(output_val.size())

tensor([[-0.0783, -0.1200,  0.0401, -0.0513, -0.0119, -0.0339,  0.0752,  0.0820,
         -0.0909,  0.0044]], grad_fn=<AddmmBackward>)
torch.Size([1, 10])


<h3> Compute Loss </h3>

In [47]:
model.zero_grad()
output_val.backward(torch.randn(1,10))

In [48]:
input_val = torch.randn(1,1,32,32)
output_val = model(input_val)

target = torch.randn(1,10)
loss_function = nn.MSELoss()
loss = loss_function(output_val, target)
print(loss)

tensor(1.0067, grad_fn=<MseLossBackward>)


In [52]:
print(loss.grad_fn) # MSELoss
print(loss. grad_fn.next_functions[0][0]) # Linear
print(loss. grad_fn.next_functions[0][0].next_functions[0][0]) # ReLu
# ...and so on

<MseLossBackward object at 0x000001BBBAAA04F0>
<AddmmBackward object at 0x000001BBB9EA5A60>
<AccumulateGrad object at 0x000001BBB9C09970>


<h3> Backprop and compute gradients </h3>

In [53]:
model.zero_grad()
print(f'Conv1 grad before backprop: {model.conv1.bias.grad}')
loss.backward()
print(f'Conv1 grad after backprop: {model.conv1.bias.grad}')

Conv1 grad before backprop: tensor([0., 0., 0., 0., 0., 0.])
Conv1 grad after backprop: tensor([-0.0051,  0.0011,  0.0083,  0.0064, -0.0015,  0.0078])


<h3> Updating weights </h3>

The simplest update rule used in practice is the Stochastic Gradient Descent (SGD):
```
weight = weight - learning_rate * gradient
```
We can implement this using simple Python code:
```            
learning_rate = 0.01
for f in net.parameters():
    f.data.sub_(f.grad.data * learning_rate)
```
However as there are various standard techniques, we would use the torch.optim package from Pytorch

In [54]:
import torch.optim as optim
learning_rate = 0.01
optimizer = optim.SGD(model.parameters(), lr=learning_rate)

In [56]:
# Training loop
optimizer.zero_grad()   
output_val = model(input_val)
loss = loss_function(output_val, target)
loss.backward()
optimizer.step()  # Updates the weights  