![NN to classifies digit images](https://pytorch.org/tutorials/_images/mnist.png)


In [53]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

## Define the network

In [5]:
class Net(nn.Module):
    def __init__(self):
        # 1 input image channel
        # 6 output channels
        # 3x3 square convolution kernel
        super(Net, self).__init__() #check
        self.conv1 = nn.Conv2d(1, 6, 3)
        self.conv2 = nn.Conv2d(6, 16, 3)

        # affine operation: y=Wx+b
        self.fc1 = nn.Linear(16*6*6, 120) # 6x6 from image dimension
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        # Max pooling over a (2,2) window
        x = F.max_pool2d(F.relu(self.conv1(x)), (2,2))
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    
    def num_flat_features(self, x):
        size = x.size()[1:] # all dimenstions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

In [6]:
net = Net()
net

Net(
  (conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=576, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)

In [8]:
params = list(net.parameters())
params

[Parameter containing:
 tensor([[[[-0.0778,  0.1738, -0.1890],
           [ 0.3303, -0.2505,  0.1739],
           [ 0.3105,  0.3157,  0.2047]]],
 
 
         [[[ 0.0372,  0.1287, -0.1029],
           [-0.0147, -0.2301, -0.1727],
           [-0.2450,  0.2055, -0.2693]]],
 
 
         [[[-0.1871,  0.1386,  0.0817],
           [-0.2116, -0.0950, -0.2100],
           [ 0.2925,  0.1154, -0.3127]]],
 
 
         [[[-0.1477, -0.0971,  0.2807],
           [-0.2617, -0.0184,  0.1013],
           [ 0.3322, -0.1448,  0.2452]]],
 
 
         [[[ 0.2343, -0.0564, -0.0395],
           [ 0.2143, -0.1439,  0.0593],
           [ 0.1912, -0.1184,  0.2231]]],
 
 
         [[[ 0.2257, -0.2099, -0.3002],
           [ 0.1058,  0.2332,  0.3062],
           [-0.0963,  0.1729,  0.0910]]]], requires_grad=True),
 Parameter containing:
 tensor([-0.3158,  0.0364, -0.1277, -0.1084, -0.3075,  0.1283],
        requires_grad=True),
 Parameter containing:
 tensor([[[[ 2.2868e-02, -6.8591e-02, -9.5567e-02],
           [

In [11]:
print(len(params))
print(params[0].size()) # conv1 weight

10
torch.Size([6, 1, 3, 3])


In [13]:
input = torch.randn(1,1,32,32)
out = net(input)
print(out)
print(out.size())

tensor([[ 0.0285, -0.0539, -0.0728, -0.1190, -0.0658,  0.0282, -0.0775,  0.0735,
          0.0223, -0.0614]], grad_fn=<AddmmBackward>)
torch.Size([1, 10])


In [14]:
net.zero_grad()
out.backward(torch.randn(1, 10))

## Loss Function

In [44]:
output = net(input)
print(output)
print(target.size())

tensor([[ 0.0090, -0.0636, -0.0815, -0.1058, -0.0746,  0.0238, -0.0753,  0.0756,
          0.0281, -0.0474]], grad_fn=<AddmmBackward>)
torch.Size([1, 10])


In [45]:
target = torch.randn(10) # dummy target
print(target)
print(target.size())
target = target.view(1, -1) # output size
print(target)
print(target.size())

tensor([-0.7220,  0.2970,  0.8438,  0.8943, -0.0916,  0.5041,  0.7552, -2.6238,
        -0.7949,  0.8165])
torch.Size([10])
tensor([[-0.7220,  0.2970,  0.8438,  0.8943, -0.0916,  0.5041,  0.7552, -2.6238,
         -0.7949,  0.8165]])
torch.Size([1, 10])


In [46]:
criterion = nn.MSELoss()
loss = criterion(output, target)
print(loss)

tensor(1.2151, grad_fn=<MseLossBackward>)


In [47]:
print(loss.grad_fn) # MESLoss
print(loss.grad_fn.next_functions[0][0]) # Linear
print(loss.grad_fn.next_functions[0][0].next_functions[0][0]) #ReLU

<MseLossBackward object at 0x7f44f577b860>
<AddmmBackward object at 0x7f44f577b5f8>
<AccumulateGrad object at 0x7f44f577b860>



## Backprop

In [48]:
net.zero_grad() # zeroes the gradient buffers of all parameters
print("conv1.bias.grad before backward")
print(net.conv1.bias.grad)

conv1.bias.grad before backward
tensor([0., 0., 0., 0., 0., 0.])


In [49]:
loss.backward()
print("conv1.bia.grad after backward")
print(net.conv1.bias.grad)

conv1.bia.grad after backward
tensor([ 0.0027, -0.0107,  0.0187,  0.0006,  0.0174,  0.0068])


## Updata the weights

In [52]:
learning_rate = 0.01
for f in net.parameters():
    print("f:", f.size())
    print(f.grad.data)
    f.data.sub_(f.grad.data * learning_rate)
    print("------>")
    print(f.grad.data)
    print("========================================================")

f: torch.Size([6, 1, 3, 3])
tensor([[[[ 1.9914e-04,  6.1256e-03, -4.5609e-03],
          [-2.1199e-03, -4.5061e-04, -2.0042e-03],
          [-2.0345e-03,  9.4744e-03,  6.4768e-03]]],


        [[[ 5.5451e-03, -7.3614e-03, -5.1983e-03],
          [-1.3874e-02,  7.1967e-03,  7.8678e-03],
          [ 1.9095e-04, -1.0183e-02, -1.3258e-05]]],


        [[[-1.3118e-02, -6.8251e-03,  5.4174e-03],
          [-2.2756e-02, -3.1255e-03, -9.4371e-03],
          [ 1.7207e-02,  8.5978e-04, -3.0406e-03]]],


        [[[ 1.3976e-02,  7.4048e-03,  8.9657e-03],
          [-2.6607e-03, -5.9383e-03,  1.2024e-02],
          [-3.2653e-03,  6.7447e-03, -4.3272e-03]]],


        [[[-4.0756e-03,  6.9799e-04,  1.2703e-02],
          [ 8.9268e-03, -9.2539e-03, -1.5147e-03],
          [ 1.3185e-02, -1.4954e-02,  1.9136e-02]]],


        [[[ 3.6144e-03,  1.2869e-02,  1.7646e-02],
          [-1.1047e-03,  9.2763e-03,  1.2870e-02],
          [-5.5574e-03, -1.1743e-03,  9.1300e-03]]]])
------>
tensor([[[[ 1.9914e-04,

In [56]:
# create optimizer
optimizer = optim.SGD(net.parameters(), lr=0.01) 
# in training loop
optimizer.zero_grad() # zero the gradient buffers

output = net(input)
loss = criterion(output, target)
loss.backward()
optimizer.step() # does the update