### PyTorch Fundamentals
#### 2. Variables and Gradients
##### 2.1 Variables
- A variable wraps tensor
- Allows accumuulation of gradients

In [1]:
import torch
from torch.autograd import Variable
a = Variable(torch.ones(2,2), requires_grad=True) # torch ones -> not torch variable
torch.ones(2,2) # not a variable
print(type(a))
a

<class 'torch.Tensor'>


tensor([[1., 1.],
        [1., 1.]], requires_grad=True)

In [2]:
b=Variable(torch.ones(2,2), requires_grad=True)
print(a + b)
print(torch.add(a,b))
print(a * b)
print(torch.mul(a,b))

tensor([[2., 2.],
        [2., 2.]], grad_fn=<AddBackward0>)
tensor([[2., 2.],
        [2., 2.]], grad_fn=<AddBackward0>)
tensor([[1., 1.],
        [1., 1.]], grad_fn=<MulBackward0>)
tensor([[1., 1.],
        [1., 1.]], grad_fn=<MulBackward0>)


### Gradients
#### What exactly is requires_grad?
- Allows calculation of gradients w.r.t. the variable
$y_{i}=5(x_{i}+1)^2$

In [3]:
x = Variable(torch.ones(2), requires_grad=True)
x

tensor([1., 1.], requires_grad=True)

$x=1$ then $y_{i}(1)=5(1+1)^2=5(4)=20$

In [4]:
y = 5* (x+1) **2 # equation in python
y

tensor([20., 20.], grad_fn=<MulBackward0>)

### Backward should be called only on a scalar (i.e. 1-element tensor) or with gradient with respect to (w.r.t.) the variable
- Let's reduce y to a scalar then...

$ o = 1/2 \sum_{i} y_{i}$

In [5]:
o = (1/2) * torch.sum(y)
o

tensor(20., grad_fn=<MulBackward0>)

In [6]:
o = (1/2) * torch.sum(y)
o

tensor(20., grad_fn=<MulBackward0>)

##### Function -> o (mean of function , 1/2 cause 2 elements) -> substiton - >symbolic differentation
<b> Recap y equation: </b> $ y_{i}=5(x_{i} + 1 )^2 $ <br/>
<b> Recap o equation: </b> $ o = (1/2) * \sum_{i} y_{i}$ <br/>
<b> Substitute y into o equation: </b> $ o = (1/2) \sum_{i} 5(x_{i} + 1)^2$

$\frac{\partial o}{\partial x_{i}} = (1/2) [10(x_{i} +1)]$

Now for $x_{i}=1$

$\frac{\partial o}{\partial x_{i}} = (1/2) [10(1 + 1)]= (10/2)(2)=10$

In [7]:
# first we calculated o <- mean
o.backward()   # it calcualates gradients with respect to x
x.grad

tensor([10., 10.])

In [8]:
# z=torch.ones(2,2)
# o.backward(torch.FloatTensor(z))
# x.grad

### Summary
- Variable
    - Wraps a tensor for gradient accumulation
- Gradients
    - Define original equation
    - Substitute equation with x values
    - Reduce to scalar output, o through mean
    - Calculate gradients with o.backward()
    - Then access gradients of the x variable through x.grad