In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F

In [44]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__() # super
        # 1 input image channel, 6 output channels, 5x5 square convolution kernel
        # (input, output, convolution size)
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        # an affine operation: y = wx + b
        self.fc1 = nn.Linear(16 * 5 * 5, 120) # why 16*5*5
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        
    def forward(self, x):
        # Max pooling over a (2, 2) window
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        # If the size is a square you can only specify a single number
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    
    def num_flat_features(self, x):
        size = x.size()[1:] # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

In [48]:
net = Net()
print(net)

Net (
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear (400 -> 120)
  (fc2): Linear (120 -> 84)
  (fc3): Linear (84 -> 10)
)


In [13]:
params = list(net.parameters())
print(len(params))
print(params[0].size()) # conv1's .weight
# [6, '1' why?, 5, 5]

10
torch.Size([6, 1, 5, 5])


In [46]:
input = Variable(torch.randn(1, 1, 32, 32))
print(input.size())
out = net(input)
print(out)

torch.Size([1, 1, 32, 32])
Variable containing:
 0.0415  0.1586  0.1219  0.0461  0.0809  0.1691  0.0779  0.0286 -0.1085  0.0437
[torch.FloatTensor of size 1x10]



In [64]:
net.zero_grad()
out.backward(torch.randn(1,10))

RuntimeError: Trying to backward through the graph second time, but the buffers have already been freed. Please specify retain_variables=True when calling backward for the first time.

In [78]:
#? Trying to backward through the graph second time, 
# but the buffers have already been freed. Please 
# specify retain_variables=True when calling backward 
# for the first time.

In [73]:
output = net(input)
target = Variable(torch.arange(1, 11)) # a dummy target, for example
print(target)
criterion = nn.MSELoss()

loss = criterion(output, target)
print(loss)

Variable containing:
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
[torch.FloatTensor of size 10]

Variable containing:
 38.6848
[torch.FloatTensor of size 1]



In [75]:
net.zero_grad()

print('conv1.bias.grad before backward')
print(net.conv1.bias.grad)

loss.backward()

print('conv1.bias.grad after backward')
print(net.conv1.bias.grad)

conv1.bias.grad before backward
Variable containing:
 0
 0
 0
 0
 0
 0
[torch.FloatTensor of size 6]



RuntimeError: Trying to backward through the graph second time, but the buffers have already been freed. Please specify retain_variables=True when calling backward for the first time.

In [24]:
learning_rate = 0.01
for f in net.parameters():
    f.data.sub_(f.grad.data * learning_rate)

In [77]:
import torch.optim as optim

# create your optimizer
optimizer = optim.SGD(net.parameters(), lr=0.01)

# in your trainig loop:

for i in range(10):
    optimizer.zero_grad() # zero the gradients buffers
    output = net(input) # forward
    loss = criterion(output, target) # desgin loss
    loss.backward() # backward
    optimizer.step() # Does the update
    print(output)

Variable containing:
-0.1223 -0.0825 -0.0774  0.0968  0.0227 -0.0385  0.0362  0.1089  0.0755  0.0699
[torch.FloatTensor of size 1x10]

Variable containing:
-0.1335 -0.0782 -0.0277  0.0896  0.0893  0.0073  0.1432  0.1556  0.1664  0.1897
[torch.FloatTensor of size 1x10]

Variable containing:
-0.1713 -0.0654  0.0493  0.0889  0.2113  0.0972  0.2948  0.2505  0.3231  0.3621
[torch.FloatTensor of size 1x10]

Variable containing:
-0.2312 -0.0009  0.2163  0.0931  0.4677  0.2979  0.6108  0.4672  0.6806  0.7237
[torch.FloatTensor of size 1x10]

Variable containing:
-0.3740  0.1947  0.7092  0.1691  1.2099  0.8871  1.5603  1.1738  1.7448  1.8373
[torch.FloatTensor of size 1x10]

Variable containing:
-0.6770  1.1679  2.7371  1.0543  4.3069  3.6719  5.5569  4.6210  6.4568  6.8169
[torch.FloatTensor of size 1x10]

Variable containing:

Columns 0 to 7 
  1.2164   3.9672   6.6748   6.8254  10.6379  11.8962  14.6565  15.6060

Columns 8 to 9 
 18.3675  20.0413
[torch.FloatTensor of size 1x10]

Variable co