In [169]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [170]:
# nn.Module 
#  - A convenient way of encapsulating parameters, 
#  - With helpers for moving them to the GPU, exporting, loading etc.
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()

        # Convolution specifications.
        #  Creating the conv specification also creates weight vector
        #  and initializes them with small random values, with a random seed.
        self.conv1 = nn.Conv2d(1,6,3)
        self.conv2 = nn.Conv2d(6,16,3)
        
        # fully connected layers
        self.fc1   = nn.Linear(16 * 6 * 6, 120)
        self.fc2   = nn.Linear(120, 84)
        self.fc3   = nn.Linear(84,10)
    
    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), (2,2))
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    
    def num_flat_features(self, x):
        size = x.size()[1:]
        num_features = 1
        for s in size:
            num_features *= s
        return num_features


# Creating the network initializes the weight vectors with random values,
# with a new random seed each time.
net = Net()
print(net)
              


Net(
  (conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=576, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [171]:
# Show learnable parameters.
#  This is done by the nn framework, that searches through the
#  network specification for _parameters fields using reflection.
params = list(net.parameters())
print(len(params))

# Size of convolution weights in first layer.
print(params[0].size())  # conv kernels
print(params[1].size())  # biases
print(params[2].size())  # conv kernels
print(params[3].size())  # biases
print(params[4].size())  # 
print(params[5].size())
print(params[6].size())
print(params[7].size())
print(params[8].size())
print(params[9].size())


10
torch.Size([6, 1, 3, 3])
torch.Size([6])
torch.Size([16, 6, 3, 3])
torch.Size([16])
torch.Size([120, 576])
torch.Size([120])
torch.Size([84, 120])
torch.Size([84])
torch.Size([10, 84])
torch.Size([10])


In [172]:
# Show the full specifiction of the convolution layer.
#   We see that it has attached parameters have been initialized
#   to random values. The paramters field contains both weights and bias.
net.conv1.__dict__

{'training': True,
 '_parameters': OrderedDict([('weight', Parameter containing:
               tensor([[[[ 0.2526,  0.3135, -0.0784],
                         [ 0.0204,  0.1244, -0.0102],
                         [ 0.2787,  0.2477,  0.3020]]],
               
               
                       [[[ 0.2295,  0.0069, -0.0217],
                         [-0.2912, -0.2154,  0.2183],
                         [-0.2882,  0.2601,  0.3117]]],
               
               
                       [[[ 0.1047, -0.1835, -0.2343],
                         [-0.3302, -0.1199,  0.1460],
                         [-0.2766, -0.2508,  0.0382]]],
               
               
                       [[[ 0.2453,  0.0481,  0.1039],
                         [ 0.2205, -0.2978,  0.3254],
                         [ 0.2055, -0.1040, -0.2950]]],
               
               
                       [[[ 0.3266, -0.1190,  0.3330],
                         [ 0.3076, -0.2553,  0.2697],
                         [ 

In [173]:
# Apply the network to a random input.
input = torch.randn(1,1,32,32)
out   = net(input)
print(out)

tensor([[-0.0525,  0.0240,  0.1360,  0.0942,  0.0413, -0.0365, -0.0871, -0.1458,
         -0.0712, -0.1362]], grad_fn=<AddmmBackward>)


In [174]:
# Zero the gradient buffers of all parameters then backprop a random gradient. 
net.zero_grad()
out.backward(torch.randn(1,10))

In [175]:
print(out)

tensor([[-0.0525,  0.0240,  0.1360,  0.0942,  0.0413, -0.0365, -0.0871, -0.1458,
         -0.0712, -0.1362]], grad_fn=<AddmmBackward>)


In [176]:
# Loss functions
output = net(input)
target = torch.randn(10)     # dummy target for this example
target = target.view(1, -1)  # pack it into a mini-batch of a single element.
print(target)

tensor([[ 0.4980,  0.4245,  1.5474,  1.3388,  0.2780, -0.1623,  1.1596, -0.1084,
          0.0507, -0.5938]])


In [177]:
criterion = nn.MSELoss()
loss = criterion(output, target)
print(loss)

tensor(0.5856, grad_fn=<MseLossBackward>)


In [178]:
# Can print out the backward graph.
print(loss.grad_fn)
print(loss.grad_fn.next_functions[0][0])
print(loss.grad_fn.next_functions[0][0].next_functions[0][0])

<MseLossBackward object at 0x7fa79835c8d0>
<AddmmBackward object at 0x7fa7aa0d12d0>
<AccumulateGrad object at 0x7fa79835c8d0>


In [179]:
net.conv1._parameters

OrderedDict([('weight',
              Parameter containing:
              tensor([[[[ 0.2526,  0.3135, -0.0784],
                        [ 0.0204,  0.1244, -0.0102],
                        [ 0.2787,  0.2477,  0.3020]]],
              
              
                      [[[ 0.2295,  0.0069, -0.0217],
                        [-0.2912, -0.2154,  0.2183],
                        [-0.2882,  0.2601,  0.3117]]],
              
              
                      [[[ 0.1047, -0.1835, -0.2343],
                        [-0.3302, -0.1199,  0.1460],
                        [-0.2766, -0.2508,  0.0382]]],
              
              
                      [[[ 0.2453,  0.0481,  0.1039],
                        [ 0.2205, -0.2978,  0.3254],
                        [ 0.2055, -0.1040, -0.2950]]],
              
              
                      [[[ 0.3266, -0.1190,  0.3330],
                        [ 0.3076, -0.2553,  0.2697],
                        [ 0.0849,  0.1274, -0.2441]]],
              


In [180]:
# Show bias parameters being updated via backpropagation.
net.zero_grad()

print('conv1.bias.grad before backward')
print(net.conv1.bias.grad)

loss.backward()

print('conv1.bias.grad after backward')
print(net.conv1.bias.grad)

conv1.bias.grad before backward
tensor([0., 0., 0., 0., 0., 0.])
conv1.bias.grad after backward
tensor([-0.0072, -0.0175, -0.0079, -0.0122, -0.0022,  0.0024])


In [181]:
# Update all the weights using SCD.
learning_rate = 0.01
for f in net.parameters():
    f.data.sub_(f.grad.data * learning_rate)

In [182]:
net.conv1._parameters

OrderedDict([('weight',
              Parameter containing:
              tensor([[[[ 0.2526,  0.3136, -0.0784],
                        [ 0.0204,  0.1244, -0.0100],
                        [ 0.2787,  0.2478,  0.3021]]],
              
              
                      [[[ 0.2295,  0.0070, -0.0218],
                        [-0.2912, -0.2155,  0.2183],
                        [-0.2882,  0.2603,  0.3118]]],
              
              
                      [[[ 0.1049, -0.1836, -0.2342],
                        [-0.3304, -0.1198,  0.1461],
                        [-0.2766, -0.2510,  0.0381]]],
              
              
                      [[[ 0.2453,  0.0480,  0.1039],
                        [ 0.2205, -0.2980,  0.3255],
                        [ 0.2056, -0.1039, -0.2950]]],
              
              
                      [[[ 0.3267, -0.1191,  0.3330],
                        [ 0.3076, -0.2554,  0.2696],
                        [ 0.0849,  0.1273, -0.2441]]],
              


In [201]:
# Use pytorch optimisers
import torch.optim as optim
optimizer = optim.SGD(net.parameters(), lr=0.01)

# Destructively zero the gradient buffers.
optimizer.zero_grad()

# Run the forward pass.
output = net(input)

# Compute the loss in the forward psas.
loss   = criterion(output, target)

# Backprop loss.
loss.backward()

# Update the parameters based on the gradients currently in their gradient buffers.
optimizer.step()

In [202]:
net.conv1._parameters

OrderedDict([('weight',
              Parameter containing:
              tensor([[[[ 0.2527,  0.3155, -0.0794],
                        [ 0.0202,  0.1244, -0.0092],
                        [ 0.2801,  0.2492,  0.3038]]],
              
              
                      [[[ 0.2302,  0.0074, -0.0228],
                        [-0.2923, -0.2166,  0.2187],
                        [-0.2885,  0.2620,  0.3129]]],
              
              
                      [[[ 0.1069, -0.1845, -0.2342],
                        [-0.3323, -0.1198,  0.1472],
                        [-0.2772, -0.2526,  0.0377]]],
              
              
                      [[[ 0.2462,  0.0478,  0.1040],
                        [ 0.2213, -0.2989,  0.3265],
                        [ 0.2062, -0.1030, -0.2953]]],
              
              
                      [[[ 0.3289, -0.1199,  0.3338],
                        [ 0.3090, -0.2569,  0.2702],
                        [ 0.0841,  0.1271, -0.2457]]],
              
