In [1]:
%matplotlib inline

In [2]:
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F


Define the network
------------------

Let’s define this network: 2 dim input - 1 hidden neuron - 2 output neuron.
We will not use bias.



In [3]:
input_dim=2

In [4]:
net = nn.Sequential(nn.Linear(input_dim, 1,bias=False),nn.Linear(1,2,bias=False))
print(net)

Sequential (
  (0): Linear (2 -> 1)
  (1): Linear (1 -> 2)
)


This network has 2 neurons on the first layer, 2 neurons on the second layer, therefore we will need to train 4 parameters. Let us set up the initial values e.g. to [3,3] and [4,5].


In [5]:
net[0].weight=nn.Parameter(torch.Tensor([3,3]).view(1,2))
net[1].weight=nn.Parameter(torch.Tensor([4,5]).view(2,1))

Let us print out the weights of the network

In [6]:
for f in net.parameters():
    print(f.data)


 3  3
[torch.FloatTensor of size 1x2]


 4
 5
[torch.FloatTensor of size 2x1]



Let the input of the network be e.g. [1,2]

In [7]:
#input = Variable(torch.randn(1,input_dim))
input = Variable(torch.FloatTensor([1,2]))
print(input)


Variable containing:
 1
 2
[torch.FloatTensor of size 2]



Let us calculate the output of the neural network with these inputs:

In [8]:
output = net(input)
print(output)

Variable containing:
 36
 45
[torch.FloatTensor of size 2]



Assume that the true output is e.g. [35,35] and we want to do a backprop step to update the weights

In [9]:
target = Variable(torch.FloatTensor([35,35]))

Let us use the MSE loss

In [10]:
criterion = nn.MSELoss()


Print out the loss. It should be ((36-35)^2+(45-35)^2)/2 = 50.5

In [11]:
loss = criterion(output, target)
print(loss)

Variable containing:
 50.5000
[torch.FloatTensor of size 1]



Calculate the derivative of the 4 parameters with backprop. We will keep the computation  graph so we can play with it later if we need. 
The 4 gradients are [54,108] and [9,90]

In [12]:
net.zero_grad()     # zeroes the gradient buffers of all parameters
loss.backward(torch.Tensor([1]),retain_graph=True )
for f in net.parameters():
    print(f.grad.data)



  54  108
[torch.FloatTensor of size 1x2]


  9
 90
[torch.FloatTensor of size 2x1]



In [13]:
learning_rate = 0.0001
for f in net.parameters():
    f.data.sub_(f.grad.data * learning_rate)

In [14]:
for f in net.parameters():
    print(f.data)


 2.9946  2.9892
[torch.FloatTensor of size 1x2]


 3.9991
 4.9910
[torch.FloatTensor of size 2x1]



In [15]:
output = net(input)
print(output)

Variable containing:
 35.8839
 44.7842
[torch.FloatTensor of size 2]



In [16]:
loss = criterion(output, target)
print(loss)

Variable containing:
 48.2564
[torch.FloatTensor of size 1]



We can also use the optim module to do the updates:

In [17]:
import torch.optim as optim

# create your optimizer
optimizer = optim.SGD(net.parameters(), lr=0.0001)

# in your training loop:
for x in range(0, 10):
    optimizer.zero_grad()   # zero the gradient buffers
    output = net(input)
    loss = criterion(output, target)
    loss.backward() # calculate the new gradients
    optimizer.step()    # Does the update

    output = net(input) #  new output after weight updates
    #print('output:', output)
    loss = criterion(output, target) # new loss
    print('loss:',loss) # if the learning rate is not too high then this should be decreasing

('loss:', Variable containing:
 46.1385
[torch.FloatTensor of size 1]
)
('loss:', Variable containing:
 44.1385
[torch.FloatTensor of size 1]
)
('loss:', Variable containing:
 42.2488
[torch.FloatTensor of size 1]
)
('loss:', Variable containing:
 40.4627
[torch.FloatTensor of size 1]
)
('loss:', Variable containing:
 38.7737
[torch.FloatTensor of size 1]
)
('loss:', Variable containing:
 37.1759
[torch.FloatTensor of size 1]
)
('loss:', Variable containing:
 35.6636
[torch.FloatTensor of size 1]
)
('loss:', Variable containing:
 34.2318
[torch.FloatTensor of size 1]
)
('loss:', Variable containing:
 32.8754
[torch.FloatTensor of size 1]
)
('loss:', Variable containing:
 31.5900
[torch.FloatTensor of size 1]
)
