In [1]:
# torch.nn:package used to construct neural networks
# torch.nn.Module:class of a standard neural network
# torch.nn.Module.forward:function receiving input and returning output 

In [2]:
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F

In [12]:
class Net(nn.Module):
    def __init__(self):
        super(Net,self).__init__()
        self.conv1=nn.Conv2d(1,6,5)
        self.conv2=nn.Conv2d(6,16,5)
        self.fc1=nn.Linear(16*5*5,120)
        self.fc2=nn.Linear(120,84)
        self.fc3=nn.Linear(84,10)
    
    def forward(self,x):
        x=F.max_pool2d(F.relu(self.conv1(x)),(2,2))
        x=F.max_pool2d(F.relu(self.conv2(x)),(2,2))
        x=x.view(-1,self.num_flat_features(x))
        x=F.relu(self.fc1(x))
        x=F.relu(self.fc2(x))
        x=self.fc3(x)
        return x
    
    def num_flat_features(self,x):
        size=x.size()[1:]
        num_features=1
        for s in size:
            num_features*=s
        return num_features
net=Net()
print(net)

Net (
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear (400 -> 120)
  (fc2): Linear (120 -> 84)
  (fc3): Linear (84 -> 10)
)


In [24]:
params=list(net.parameters())
print(len(params))
for i in range(len(params)):
    print(i,params[i].size())

10
0 torch.Size([6, 1, 5, 5])
1 torch.Size([6])
2 torch.Size([16, 6, 5, 5])
3 torch.Size([16])
4 torch.Size([120, 400])
5 torch.Size([120])
6 torch.Size([84, 120])
7 torch.Size([84])
8 torch.Size([10, 84])
9 torch.Size([10])


In [60]:
input=Variable(torch.randn(1,1,32,32))
out=net(input)
print(out)

Variable containing:
 0.0307 -0.1047  0.1669 -0.0065 -0.0685 -0.1237 -0.0779  0.0646  0.0088 -0.0509
[torch.FloatTensor of size 1x10]

Parameter containing:
 0.0236
-0.0901
 0.0757
 0.0030
-0.0206
-0.1038
-0.1015
-0.0445
 0.0342
-0.0825
[torch.FloatTensor of size 10]



In [61]:
#nSamples x nChannels x Height x Width
input=Variable(torch.randn(1,1,32,32))
out=net.forward(input)
print(out)

Variable containing:
 0.0324 -0.1073  0.1745  0.0062 -0.0734 -0.0894 -0.0694  0.0540  0.0169 -0.0713
[torch.FloatTensor of size 1x10]



In [62]:
net.zero_grad()
out.backward(torch.randn(1,10))

In [63]:
# torch.nn only supports mini-batches The entire torch.nn package only supports inputs that are a mini-batch 
# of samples, and not a single sample.
# If you have a single sample, just use input.unsqueeze(0) to add a fake batch dimension.
input1=Variable(torch.randn(3,32,32))
input2=input.unsqueeze(0)
print(input1.size())
print(input2.size())

torch.Size([3, 32, 32])
torch.Size([1, 1, 1, 32, 32])


In [64]:
output=net(input)
target=Variable(torch.arange(1,11))
criterion=nn.MSELoss()
loss=criterion(output,target)
print(loss)

Variable containing:
 38.7372
[torch.FloatTensor of size 1]



In [67]:
print(loss.grad_fn)
print(loss.grad_fn.next_functions[0][0])
print(loss.grad_fn.next_functions[0][0].next_functions[0][0])

<torch.autograd.function.MSELossBackward object at 0x7f6326604138>
<torch.autograd.function.AddmmBackward object at 0x7f6326604048>
<AccumulateGrad object at 0x7f6326610c50>


In [68]:
net.zero_grad()
print('conv1.bias.grad before backward')
print(net.conv1.bias.grad)

loss.backward()
print('conv1.bias.grad after backward')
print(net.conv1.bias.grad)

conv1.bias.grad before backward
Variable containing:
 0
 0
 0
 0
 0
 0
[torch.FloatTensor of size 6]

conv1.bias.grad after backward
Variable containing:
 0.0277
 0.0643
-0.1082
 0.1034
 0.0223
-0.0978
[torch.FloatTensor of size 6]



In [69]:
# Using python code to update weights
# learning_rate = 0.01
# for f in net.parameters():
#     f.data.sub_(f.grad.data * learning_rate)

In [71]:
# Using pakage nn.optim to update weights
import torch.optim as optim
optimizer=optim.SGD(net.parameters(),lr=0.01)
optimizer.zero_grad()
output=net(input)
loss=criterion(output,target)
loss.backward()
optimizer.step()

In [73]:
print(net.conv1.weight.grad)

Variable containing:
(0 ,0 ,.,.) = 
 -0.0770  0.0213  0.0422  0.0961  0.0237
  0.0302  0.1942 -0.0576  0.0028  0.1132
 -0.1761 -0.0682  0.0742 -0.0353  0.0586
 -0.0673 -0.0218  0.0274 -0.1235 -0.1659
 -0.1227  0.0029 -0.0429 -0.0480 -0.0906

(1 ,0 ,.,.) = 
 -0.0147 -0.0879 -0.1156 -0.1352  0.0526
  0.0646 -0.0279 -0.1558  0.0222 -0.0439
  0.0767  0.0798 -0.0347  0.0179 -0.0256
  0.0736  0.0692  0.1238  0.0433 -0.1749
  0.0727  0.0236 -0.0337 -0.1562 -0.0197

(2 ,0 ,.,.) = 
  0.1164  0.1400 -0.0511 -0.0527 -0.0768
 -0.0369 -0.0507  0.0314 -0.0397  0.0786
 -0.0958 -0.1261  0.0616  0.0436  0.0549
 -0.0294 -0.0026  0.0527  0.0218 -0.0928
 -0.0001  0.0062  0.0994  0.0072 -0.0469

(3 ,0 ,.,.) = 
  0.0297 -0.0655 -0.0469  0.0593  0.1417
  0.0591 -0.0704  0.0744 -0.1173 -0.0800
  0.0533  0.0006 -0.0336  0.0792  0.1015
 -0.0296  0.0588 -0.1722  0.1133  0.0514
  0.1057  0.0512  0.0412 -0.0484 -0.0181

(4 ,0 ,.,.) = 
 -0.0304 -0.0079  0.0326 -0.0380 -0.0949
  0.0658  0.0018  0.0317  0.0245 -0.141