In [62]:
%matplotlib inline

## Tensors and variables

In [68]:
import torch
from torch import autograd

In [69]:
t = torch.rand(1,5)

In [70]:
t


 0.4924  0.7936  0.5643  0.2805  0.4068
[torch.FloatTensor of size 1x5]

In [71]:
v = autograd.Variable(torch.rand(1,5))

In [72]:
v

Variable containing:
 0.4853  0.3098  0.5509  0.4670  0.5482
[torch.FloatTensor of size 1x5]

Notice that `t` is different than `v`. 

Some important observations:

- The tensor `t` cannot be operated with `x`
- A variable has a `.grad` property (or field) not found in a tensor.
    - This starts as None and then it gets asigned a tensor object after backpropagation is applied to the variable


In [16]:
grad_output = torch.ones(5,1)

In [17]:
a = autograd.Variable(torch.rand(5,1), requires_grad=True)

In [18]:
b = a + 1

In [19]:
b.grad

In [20]:
b.requires_grad

True

In [21]:
b.backward(grad_output)

In [22]:
a.grad

Variable containing:
 1
 1
 1
 1
 1
[torch.FloatTensor of size 5x1]

In [23]:
b.grad

In [24]:
b.backward(grad_output)

In [25]:
a.grad

Variable containing:
 2
 2
 2
 2
 2
[torch.FloatTensor of size 5x1]

In [26]:
a.grad.data.zero_()


 0
 0
 0
 0
 0
[torch.FloatTensor of size 5x1]

In [27]:
a.grad

Variable containing:
 0
 0
 0
 0
 0
[torch.FloatTensor of size 5x1]

In [28]:
b.backward(grad_output)

## torch.Autograd.variable

In [40]:
torch.__version__

'0.2.0_4'

In [45]:
b.__dict__

{}


Autograd: automatic differentiation
===================================

Central to all neural networks in PyTorch is the ``autograd`` package.
Let’s first briefly visit this, and we will then go to training our
first neural network.


The ``autograd`` package provides automatic differentiation for all operations
on Tensors. It is a define-by-run framework, which means that your backprop is
defined by how your code is run, and that every single iteration can be
different.

Let us see this in more simple terms with some examples.

Variable
--------

``autograd.Variable`` is the central class of the package. It wraps a
Tensor, and supports nearly all of operations defined on it. Once you
finish your computation you can call ``.backward()`` and have all the
gradients computed automatically.

You can access the raw tensor through the ``.data`` attribute, while the
gradient w.r.t. this variable is accumulated into ``.grad``.

.. figure:: /_static/img/Variable.png
   :alt: Variable

   Variable

There’s one more class which is very important for autograd
implementation - a ``Function``.

``Variable`` and ``Function`` are interconnected and build up an acyclic
graph, that encodes a complete history of computation. Each variable has
a ``.grad_fn`` attribute that references a ``Function`` that has created
the ``Variable`` (except for Variables created by the user - their
``grad_fn is None``).

If you want to compute the derivatives, you can call ``.backward()`` on
a ``Variable``. If ``Variable`` is a scalar (i.e. it holds a one element
data), you don’t need to specify any arguments to ``backward()``,
however if it has more elements, you need to specify a ``grad_output``
argument that is a tensor of matching shape.



In [47]:
import torch
from torch.autograd import Variable

In [48]:
x = Variable(torch.ones(2, 2), requires_grad=True)
print(x)
y = x + 2
print(y)

Variable containing:
 1  1
 1  1
[torch.FloatTensor of size 2x2]

Variable containing:
 3  3
 3  3
[torch.FloatTensor of size 2x2]



``y`` was created as a result of an operation, so it has a ``grad_fn``.


In [49]:
print(y.grad_fn)

<torch.autograd.function.AddConstantBackward object at 0x7f623118e8b8>


Do more operations on y


In [50]:
z = y * y * 3
out = z.mean()

print(z, out)

Variable containing:
 27  27
 27  27
[torch.FloatTensor of size 2x2]
 Variable containing:
 27
[torch.FloatTensor of size 1]



Gradients
---------
let's backprop now
``out.backward()`` is equivalent to doing ``out.backward(torch.Tensor([1.0]))``


In [9]:
out.backward()

In [10]:
print(x.grad)

Variable containing:
 4.5000  4.5000
 4.5000  4.5000
[torch.FloatTensor of size 2x2]



You should have got a matrix of ``4.5``. Let’s call the ``out``
*Variable* “$o$”.
We have that $o = \frac{1}{4}\sum_i z_i$, since
$z_i = 3(x_i+2)^2$ and $z_i\bigr\rvert_{x_i=1} = 27$.
Therefore,

$$\frac{\partial o}{\partial x_i} = \frac{3}{2}(x_i+2)$$ hence
$$\frac{\partial o}{\partial x_i}\bigr\rvert_{x_i=1} = \frac{9}{2} = 4.5$$



In [60]:
x = torch.randn(3)
x = Variable(x, requires_grad=True)

y = x * 2
while y.data.norm() < 1000:
    y = y * 2

print(y)

Variable containing:
 -885.8353
   64.8493
 1190.7087
[torch.FloatTensor of size 3]



In [76]:
help(x.grad_fn)

Help on NoneType object:

class NoneType(object)
 |  Methods defined here:
 |  
 |  __bool__(self, /)
 |      self != 0
 |  
 |  __new__(*args, **kwargs) from builtins.type
 |      Create and return a new object.  See help(type) for accurate signature.
 |  
 |  __repr__(self, /)
 |      Return repr(self).



In [101]:
help(x.grad)

Help on NoneType object:

class NoneType(object)
 |  Methods defined here:
 |  
 |  __bool__(self, /)
 |      self != 0
 |  
 |  __new__(*args, **kwargs) from builtins.type
 |      Create and return a new object.  See help(type) for accurate signature.
 |  
 |  __repr__(self, /)
 |      Return repr(self).



## Models in torch.nn

In [100]:
?torch.nn.LogSigmoid

In [91]:
fc = torch.nn.Linear(W_target.size(0), 1)

In [99]:
fc.share_memory()

Linear (2 -> 1)

In [None]:
x = torch.Tensor([12.3])

## Polinomial coefficient

Many examples of models in Pytortch can be found here:

- https://github.com/pytorch/examples

In [82]:
import torch
import torch.autograd
import torch.nn.functional as F
from torch.autograd import Variable

In [83]:
POLY_DEGREE = 2
W_target = torch.randn(POLY_DEGREE, 1) * 5
b_target = torch.randn(1) * 5
max_iterations = 10000

In [84]:
print(W_target)


 0.1347
-1.5403
[torch.FloatTensor of size 2x1]



In [85]:
def make_features(x):
    """Builds features i.e. a matrix with columns [x, x^2, x^3, x^4]."""
    x = x.unsqueeze(1)
    return torch.cat([x ** i for i in range(1, POLY_DEGREE+1)], 1)


def f(x):
    """Approximated function."""
    return x.mm(W_target) + b_target[0]


def poly_desc(W, b):
    """Creates a string description of a polynomial."""
    result = 'y = '
    for i, w in enumerate(W):
        result += '{:+.2f} x^{} '.format(w, len(W) - i)
    result += '{:+.2f}'.format(b[0])
    return result


def get_batch(batch_size=32):
    """Builds a batch i.e. (x, f(x)) pair."""
    random = torch.randn(batch_size)
    x = make_features(random)
    y = f(x)
    return Variable(x), Variable(y)


# Define model
fc = torch.nn.Linear(W_target.size(0), 1)

for batch_idx in range(max_iterations):
    
    # Get data
    batch_x, batch_y = get_batch()

    # Reset gradients
    fc.zero_grad()

    # Forward pass
    output = F.smooth_l1_loss(fc(batch_x), batch_y)
    loss = output.data[0]

    # Backward pass
    output.backward()

    # Apply gradients
    for param in fc.parameters():
        param.data.add_(-0.1 * param.grad.data)

    # Stop criterion
    if loss < 1e-3:
        break

In [86]:
print('Loss: {:.6f} after {} batches'.format(loss, batch_idx))
print('==> Learned function:\t' + poly_desc(fc.weight.data.view(-1), fc.bias.data))
print('==> Actual function:\t' + poly_desc(W_target.view(-1), b_target))

Loss: 0.000770 after 92 batches
==> Learned function:	y = +0.13 x^2 -1.56 x^1 -3.93
==> Actual function:	y = +0.13 x^2 -1.54 x^1 -3.98
