## Neural networks: lenet5

In [1]:
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        
    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    
    def num_flat_features(self, x):
        size = x.size()[1:]
        num_features = 1
        for s in size:
            num_features *= s
        return num_features
    
net = Net()
print(net)

Net (
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear (400 -> 120)
  (fc2): Linear (120 -> 84)
  (fc3): Linear (84 -> 10)
)


In [2]:
# Learnable parameters
params = list(net.parameters())
print(len(params))
print(params[0].size()) # conv1's weights
print(params[0])

10
torch.Size([6, 1, 5, 5])
Parameter containing:
(0 ,0 ,.,.) = 
  0.0695 -0.0359  0.1706  0.1521  0.1067
 -0.1449 -0.0303 -0.1608 -0.1046 -0.0512
 -0.0005  0.1205  0.0144 -0.0752 -0.0652
 -0.0407  0.1882 -0.0305 -0.1201  0.0595
  0.1992  0.1409 -0.0686 -0.0687 -0.0319

(1 ,0 ,.,.) = 
  0.1687 -0.1868  0.1006  0.0462  0.1158
  0.1400 -0.1306 -0.0486  0.1926 -0.0289
 -0.0600 -0.1457 -0.0426  0.1493 -0.0056
 -0.0437  0.0336  0.0665  0.0717  0.0829
 -0.0092 -0.1656 -0.1323 -0.0101 -0.0160

(2 ,0 ,.,.) = 
 -0.1295 -0.0372 -0.1214 -0.0735 -0.0103
 -0.1637  0.0319 -0.0089  0.0573 -0.1540
  0.0758 -0.0282 -0.1910  0.0631 -0.0241
 -0.0798 -0.0871  0.0041 -0.0778  0.1138
  0.0981  0.0675 -0.1046  0.1774 -0.0779

(3 ,0 ,.,.) = 
 -0.1738 -0.0881 -0.1004 -0.1933  0.1630
  0.0095  0.0109 -0.0141  0.1925 -0.0735
  0.0104  0.0379  0.0422 -0.0575  0.0469
 -0.1396  0.1240 -0.0251  0.1017 -0.0266
  0.1868 -0.1680  0.0190 -0.1492 -0.1325

(4 ,0 ,.,.) = 
 -0.1145  0.1612 -0.1598 -0.1838  0.0746
  0.1567  

In [3]:
input = Variable(torch.randn(1, 1, 32, 32))
out = net(input)
print(out)

Variable containing:
-0.0835  0.0557  0.1064  0.0963 -0.0038  0.0173 -0.0359 -0.0972 -0.0931  0.0437
[torch.FloatTensor of size 1x10]



In [4]:
net.zero_grad()
out.backward(torch.randn(1, 10))

In [5]:
output = net(input)
target = Variable(torch.range(1, 10))
criterion = nn.MSELoss()

loss = criterion(output, target)
print(loss)

Variable containing:
 38.6278
[torch.FloatTensor of size 1]



  from ipykernel import kernelapp as app


In [6]:
print(loss.creator)
print(loss.creator.previous_functions[0][0])
print(loss.creator.previous_functions[0][0].previous_functions[0][0])
print(loss.creator.previous_functions[0][0].previous_functions[0][0].previous_functions[0][0])

<torch.nn._functions.thnn.auto.MSELoss object at 0x10b1c13c8>
<torch.nn._functions.linear.Linear object at 0x10b1c1208>
<torch.nn._functions.thnn.auto.Threshold object at 0x10b1c1128>
<torch.nn._functions.linear.Linear object at 0x10b1c1048>


In [7]:
net.zero_grad()
print(net.conv1.bias.grad)

loss.backward()
print(net.conv1.bias.grad)

Variable containing:
 0
 0
 0
 0
 0
 0
[torch.FloatTensor of size 6]

Variable containing:
1.00000e-02 *
  5.0322
 -5.9714
 -3.0797
 -5.1227
 -4.2310
 -4.0425
[torch.FloatTensor of size 6]



In [9]:
learning_rate = 0.01
for f in net.parameters():
    f.data.sub_(f.grad.data * learning_rate)
print(params[0])

Parameter containing:
(0 ,0 ,.,.) = 
  0.0699 -0.0357  0.1703  0.1527  0.1066
 -0.1434 -0.0298 -0.1595 -0.1044 -0.0502
  0.0001  0.1194  0.0125 -0.0766 -0.0655
 -0.0402  0.1890 -0.0313 -0.1193  0.0598
  0.1987  0.1410 -0.0684 -0.0696 -0.0324

(1 ,0 ,.,.) = 
  0.1685 -0.1878  0.1022  0.0451  0.1173
  0.1412 -0.1312 -0.0499  0.1940 -0.0282
 -0.0573 -0.1453 -0.0424  0.1509 -0.0043
 -0.0440  0.0347  0.0645  0.0687  0.0832
 -0.0098 -0.1676 -0.1314 -0.0106 -0.0173

(2 ,0 ,.,.) = 
 -0.1284 -0.0380 -0.1207 -0.0753 -0.0107
 -0.1639  0.0318 -0.0090  0.0576 -0.1556
  0.0757 -0.0277 -0.1912  0.0652 -0.0247
 -0.0791 -0.0890  0.0030 -0.0783  0.1143
  0.0965  0.0665 -0.1058  0.1776 -0.0790

(3 ,0 ,.,.) = 
 -0.1751 -0.0869 -0.1030 -0.1951  0.1636
  0.0091  0.0090 -0.0145  0.1923 -0.0734
  0.0104  0.0394  0.0408 -0.0575  0.0494
 -0.1417  0.1228 -0.0233  0.1037 -0.0270
  0.1868 -0.1685  0.0195 -0.1487 -0.1297

(4 ,0 ,.,.) = 
 -0.1150  0.1607 -0.1583 -0.1838  0.0767
  0.1557  0.1279 -0.1863 -0.0719  0.12

In [11]:
import torch.optim as optim

optimizer = optim.SGD(net.parameters(), lr=0.01)

optimizer.zero_grad()
output = net(input)
loss = criterion(output, target)
loss.backward()
optimizer.step()

In [12]:
print(params[0])

Parameter containing:
(0 ,0 ,.,.) = 
  0.0724 -0.0358  0.1721  0.1534  0.1088
 -0.1429 -0.0299 -0.1602 -0.1052 -0.0501
  0.0011  0.1185  0.0104 -0.0767 -0.0663
 -0.0391  0.1907 -0.0316 -0.1176  0.0607
  0.1989  0.1428 -0.0694 -0.0706 -0.0340

(1 ,0 ,.,.) = 
  0.1704 -0.1894  0.1064  0.0453  0.1184
  0.1442 -0.1343 -0.0528  0.1958 -0.0305
 -0.0574 -0.1452 -0.0438  0.1517 -0.0029
 -0.0451  0.0356  0.0638  0.0672  0.0835
 -0.0102 -0.1700 -0.1311 -0.0103 -0.0188

(2 ,0 ,.,.) = 
 -0.1283 -0.0373 -0.1205 -0.0762 -0.0108
 -0.1652  0.0303 -0.0079  0.0579 -0.1568
  0.0767 -0.0277 -0.1936  0.0680 -0.0256
 -0.0798 -0.0913  0.0029 -0.0795  0.1151
  0.0969  0.0668 -0.1070  0.1781 -0.0799

(3 ,0 ,.,.) = 
 -0.1786 -0.0874 -0.1056 -0.2003  0.1657
  0.0092  0.0101 -0.0149  0.1967 -0.0726
  0.0124  0.0429  0.0409 -0.0584  0.0512
 -0.1467  0.1246 -0.0213  0.1078 -0.0253
  0.1903 -0.1707  0.0203 -0.1506 -0.1308

(4 ,0 ,.,.) = 
 -0.1171  0.1632 -0.1594 -0.1865  0.0785
  0.1585  0.1288 -0.1896 -0.0728  0.12