**AutoGrad**

---

*Tensors form a computational graph*

In [2]:
import torch
import numpy as np

In [11]:
# A tensor does not require gradient calculation by default
x = torch.tensor(1.0)
print('x.requires_grad:',x.requires_grad)
print('x:',x)

# You can set gradient calculation to true
w = torch.tensor(0.5, requires_grad=True) 
print('w:',w)

# Some attributes of a tensor
print('w.data:',w.data)
print('w.grad:',w.grad)
print('w.requires_grad:',w.requires_grad)
print('w.grad_fn:',w.grad_fn)
print('w.is_leaf:',w.is_leaf)

x.requires_grad: False
x: tensor(1.)
w: tensor(0.5000, requires_grad=True)
w.data: tensor(0.5000)
w.grad: None
w.requires_grad: True
w.grad_fn: None
w.is_leaf: True


In [20]:
# Let's create a simple graph
a = torch.tensor(2.0,requires_grad=True)
b = torch.tensor(3.0,requires_grad=True)

print('a.data:',a.data)
print('a.grad:',a.grad)
print('a.requires_grad:',a.requires_grad)
print('a.grad_fn:',a.grad_fn)
print('a.is_leaf:',a.is_leaf)

print('')

# A function of at least a tensor
c = 2 * a + b
print('c.data:',c.data)
print('c.grad:',c.grad)   
print('c.requires_grad:',c.requires_grad)
print('c.grad_fn:',c.grad_fn)   # c is a function of some tensors, so it has fn
print('c.is_leaf:',c.is_leaf)   # c is not a leaf! c.grad is not calculated.

print('')

# backward pass; gradients of all leafs are calculated automatically
c.backward()
print('a.data:',a.data)
print('a.grad:',a.grad)
print('a.requires_grad:',a.requires_grad)
print('a.grad_fn:',a.grad_fn)
print('a.is_leaf:',a.is_leaf)

print('')

print('b.data:',b.data)
print('b.grad:',b.grad)
print('b.requires_grad:',b.requires_grad)
print('b.grad_fn:',b.grad_fn)
print('b.is_leaf:',b.is_leaf)


a.data: tensor(2.)
a.grad: None
a.requires_grad: True
a.grad_fn: None
a.is_leaf: True

c.data: tensor(7.)
c.grad: None
c.requires_grad: True
c.grad_fn: <AddBackward0 object at 0x7fb5329936a0>
c.is_leaf: False

a.data: tensor(2.)
a.grad: tensor(2.)
a.requires_grad: True
a.grad_fn: None
a.is_leaf: True

b.data: tensor(3.)
b.grad: tensor(1.)
b.requires_grad: True
b.grad_fn: None
b.is_leaf: True


  print('c.grad:',c.grad)


In [18]:
# A larger graph...
a = torch.tensor(2.0,requires_grad=True)
b = torch.tensor(3.0,requires_grad=False)

c = a * b
d = a + c

print('a.data:',a.data)
print('a.grad:',a.grad)
print('a.requires_grad:',a.requires_grad)
print('a.grad_fn:',a.grad_fn)
print('a.is_leaf:',a.is_leaf)

print('')
print('c.data:',c.data)
print('c.grad:',c.grad)   
print('c.requires_grad:',c.requires_grad)
print('c.grad_fn:',c.grad_fn)
print('c.is_leaf:',c.is_leaf)

print('')
print('d.data:',d.data)
print('d.grad:',d.grad)   
print('d.requires_grad:',d.requires_grad)
print('d.grad_fn:',d.grad_fn)
print('d.is_leaf:',d.is_leaf)

print('')
print('After backward pass...')
d.backward()  # grads for all tensors with requires_grad=True are calculated
              #   except for grads for tensors that are not leaf.
print('a.data:',a.data)
print('a.grad:',a.grad)
print('a.requires_grad:',a.requires_grad)
print('a.grad_fn:',a.grad_fn)
print('a.is_leaf:',a.is_leaf)

print('')
print('b.data:',b.data)
print('b.grad:',b.grad)
print('b.requires_grad:',b.requires_grad)
print('b.grad_fn:',b.grad_fn)
print('b.is_leaf:',b.is_leaf)

print('')
print('c.data:',c.data)
print('c.grad:',c.grad)     # gradient is not stored since it is not a leaf node
print('c.requires_grad:',c.requires_grad)
print('c.grad_fn:',c.grad_fn)
print('c.is_leaf:',c.is_leaf)


a.data: tensor(2.)
a.grad: None
a.requires_grad: True
a.grad_fn: None
a.is_leaf: True

c.data: tensor(6.)
c.grad: None
c.requires_grad: True
c.grad_fn: <MulBackward0 object at 0x7fb5329ac6d0>
c.is_leaf: False

d.data: tensor(8.)
d.grad: None
d.requires_grad: True
d.grad_fn: <AddBackward0 object at 0x7fb5b81785b0>
d.is_leaf: False

After backward pass...
a.data: tensor(2.)
a.grad: tensor(4.)
a.requires_grad: True
a.grad_fn: None
a.is_leaf: True

b.data: tensor(3.)
b.grad: None
b.requires_grad: False
b.grad_fn: None
b.is_leaf: True

c.data: tensor(6.)
c.grad: None
c.requires_grad: True
c.grad_fn: <MulBackward0 object at 0x7fb5329945b0>
c.is_leaf: False


  print('c.grad:',c.grad)
  print('d.grad:',d.grad)
  print('c.grad:',c.grad)     # gradient is not stored since it is not a leaf node


In [21]:
# Let's solve for w in y = w * x, where y and x are given below
y = torch.tensor(2.0)
x = torch.tensor(1.0)

w = torch.tensor(0.5, requires_grad=True)
print('w before update:',w)

# Try the following for more than 1 iteration
for iter in range(1):
  
  # forward pass
  y_hat = w * x
  print('y_hat:',y_hat)  # this is a result of an operation, so there is grad_fn

  # calculate loss
  loss = (y_hat-y) **2
  print('loss:',loss)    # this is a result of an operation, so there is grad_fn

  # backward pass 
  #   - for all tensors with requires_grad, the gradients will be calculated
  print('Gradient before back. pass:',w.grad)
  loss.backward()                             # gradient of loss = -2*(y_hat-y)
  print('Gradient after back. pass:',w.grad)

  #w.grad.zero_()

  #print(w)

  # update w
  #   If you do the below without torch.no_grad(), you will get an error:
  #     TypeError: unsupported operand type(s) for *: 'float' and 'NoneType'
  #   This is because updating makes it an "intermediate" (non-leaf) tensor,
  #     which makes it grad type None and requires_grad becomes False.
  #     So, in torch.no_grad() context; thus autograd is disabled 
  #       https://www.youtube.com/watch?v=MswxJw-8PvE
  with torch.no_grad():
    w -= 0.1 * w.grad # in-place operation

  print('w after update:',w)
  print('Gradient after update:',w.grad)

  # Zero the gradient - Gradients accumulate in tensors, so zero them.
  w.grad.zero_()

# Note the difference between torch.eval() and torch.no_grad()
#   torch.eval(): all the layers will be in eval mode, including the  
#                 batchnorm and dropout layers
#   torch.no_grad(): deactivate the autograd engine


w before update: tensor(0.5000, requires_grad=True)
y_hat: tensor(0.5000, grad_fn=<MulBackward0>)
loss: tensor(2.2500, grad_fn=<PowBackward0>)
Gradient before back. pass: None
Gradient after back. pass: tensor(-3.)
w after update: tensor(0.8000, requires_grad=True)
Gradient after update: tensor(-3.)
