# Pytorch has a capability of automatic  gradient calculation !
# In this Notebook We will learn each and every thing about autograd !

#################### Auto Grad ######################################
# Why we require auto grad !
""" 
When we do backpropragation we need to calculate gradient of loss function w.r.t weigth 
If we do gradient calculation with hands it will take time and it wont be dynamic as then we would have to write  
each derivative manually. To resolve this issue pytorch has a capability to calculate derivative of function automatically
which is also known as Auto Grad.  

"""




In [47]:
import torch 
from torch.autograd import grad 
import torch.nn as nn

# A simplified model of a PyTorch tensor is as an object containing the following properties:
1. data — a self-reference (per the above).
2. required_grad — whether or not this tensor is/should be connected to the computational graph.
3. grad — if required_grad is true, this prop will be a sub-tensor that collects the gradients against this tensor accumulated during backwards().
4. grad_fn — This is a reference to the most recent operation which generated this tensor. PyTorch performs automatic differentiation by looking through the grad_fn list.
5. is_leaf — Whether or not this is a leaf node.

# Simple Derivative

In [35]:
# Lets Take an simple Example 
x=torch.tensor(5.0, requires_grad=True)
x

tensor(5., requires_grad=True)

In [3]:
y=x**2
y

tensor(25., grad_fn=<PowBackward0>)

In [4]:
# Lets calculate Gradient by hand 
#  dy/dx = 2*x --- > 2x5 = 10
y.backward()
x.grad

tensor(10.)

# Partial Derivative

In [5]:
# Now lets apply Partial derivative 
x = torch.tensor(5.0,requires_grad=True)
y = torch.tensor(5.0,requires_grad=True)

f = x**2 + y**2

f.backward()
# df/dx = 2*x --- > 2*5 =10
# df/dy = 2*y --- > 2*5 =10

print(f.grad_fn)
print(x.grad)
print(y.grad)

# Here x , y Does not depend on each other they are in addition 
# So x has it's value independent of y and vice versa but 
# What if they are in multiply ?

<AddBackward0 object at 0x00000198F8AD4908>
tensor(10.)
tensor(10.)


In [7]:
x

tensor(5., requires_grad=True)

In [8]:
x = torch.tensor(5.0,requires_grad=True)
y = torch.tensor(5.0,requires_grad=True)

f = x**2 * y**2

f.backward()
# df/dx = 2*x*y^2 --- > 2*5*25 = 250
# df/dy = 2*y*x^2 --- > 2*5*25 = 250

print(f.grad_fn)
print(x.grad)
print(y.grad)

<MulBackward0 object at 0x00000198F8ACBE48>
tensor(250.)
tensor(250.)


# Nth derivative

In [40]:
# Lets Do the double derivative
def nth_derivative(f, wrt, n):

    for i in range(n):
        grads = grad(f, wrt, create_graph=True)[0]
        print(f"Grads : {grads}")
        f = grads.sum()
        print(f"Grad Sum : {f}")
        
    return grads

x = torch.tensor(5.0,requires_grad=True)
print(f"X : {x}")
f = x**2 + x**3
print(nth_derivative(f=f, wrt=x, n=4))

X : 5.0
Grads : 85.0
Grad Sum : 85.0
Grads : 32.0
Grad Sum : 32.0
Grads : 6.0
Grad Sum : 6.0
Grads : 0.0
Grad Sum : 0.0
tensor(0.)


print(nth_derivative(f=f, wrt=x, n=1))

In [11]:
print(nth_derivative(f=f, wrt=x, n=2))

tensor(32., grad_fn=<AddBackward0>)


In [12]:
print(nth_derivative(f=f, wrt=x, n=4))

tensor(0.)


# Derivative of a tensor

In [17]:
# Derivative On A Tensor
# This will give you error as gradient is only constructed for scaler values 
x = torch.tensor([5.0,4.0,3.0],requires_grad=True)
x

tensor([5., 4., 3.], requires_grad=True)

In [18]:
f=x**2
f

tensor([25., 16.,  9.], grad_fn=<PowBackward0>)

In [20]:
f.backward()
x.grad

RuntimeError: grad can be implicitly created only for scalar outputs

In [21]:
# In order to calculate gradient of tensor we need to convert them in scaler 
x = torch.tensor([5.0,4.0,3.0],requires_grad=True)
x

tensor([5., 4., 3.], requires_grad=True)

In [22]:
f=x**2
f

tensor([25., 16.,  9.], grad_fn=<PowBackward0>)

In [23]:
f.sum()

tensor(50., grad_fn=<SumBackward0>)

In [25]:
f.sum().backward()

In [26]:
x.grad

tensor([10.,  8.,  6.])

In [27]:
x = torch.tensor([5.0,4.0,3.0],requires_grad=True)
y = torch.tensor([5.0,4.0,3.0],requires_grad=True)
x

tensor([5., 4., 3.], requires_grad=True)

In [28]:
y

tensor([5., 4., 3.], requires_grad=True)

In [29]:
f = (x**2)*(y)
f

tensor([125.,  64.,  27.], grad_fn=<MulBackward0>)

In [30]:
f.backward()

RuntimeError: grad can be implicitly created only for scalar outputs

In [32]:
f.sum().backward()

In [33]:
y.grad

tensor([25., 16.,  9.])

In [34]:
x.grad

tensor([50., 32., 18.])

# Actual Implementation Of Back propagation

In [41]:
# Acutal BackPropagation Mimic 
## Weights and bias of a network 
w1=torch.tensor(3.0, requires_grad=True)
w1

tensor(3., requires_grad=True)

In [42]:
w2=torch.tensor(5.0, requires_grad=True)
w2

tensor(5., requires_grad=True)

In [44]:
bias=torch.tensor(1.0, requires_grad=True)
bias

tensor(1., requires_grad=True)

In [46]:
# Input
x1=torch.tensor(3.0, requires_grad=False)
x2=torch.tensor(5.0, requires_grad=False)

In [48]:
## equation
y=w1*x1 + w2*x2 + bias

In [49]:
## Activation function 
### try changing it with Sigmoid 
z=nn.ReLU()(y)
print(z)

tensor(35., grad_fn=<ReluBackward0>)


In [50]:
## back propagation 
z.backward()

In [51]:
print(z)

tensor(35., grad_fn=<ReluBackward0>)


In [52]:
print(w1.grad)

tensor(3.)


# Complex Implementation Of Back propagation

In [53]:
# Weights
weights=torch.tensor([[1.0,2.0,3.0],[2.0,3.0,4.0],[5.0,2.0,1.0]],requires_grad=True)
weights

tensor([[1., 2., 3.],
        [2., 3., 4.],
        [5., 2., 1.]], requires_grad=True)

In [54]:
bias=torch.ones((3,3),requires_grad=False)
bias

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]])

In [55]:
# inputs
inputs=torch.randn((3,3), requires_grad=False)
inputs

tensor([[-2.0324, -0.5222,  0.4357],
        [ 0.3314,  0.3653,  0.8026],
        [-0.1110,  0.1594,  1.0076]])

In [56]:
# Here we need to do dot product of 2D tensors 
y=weights*inputs + bias
y

tensor([[-1.0324, -0.0444,  2.3071],
        [ 1.6627,  2.0958,  4.2104],
        [ 0.4449,  1.3189,  2.0076]], grad_fn=<AddBackward0>)

In [58]:
# Activation Function 
z = nn.ReLU()(y)
z

tensor([[0.0000, 0.0000, 2.3071],
        [1.6627, 2.0958, 4.2104],
        [0.4449, 1.3189, 2.0076]], grad_fn=<ReluBackward0>)

In [59]:
## back propagation 
### Refer Tensor derivative section 
z.backward()

RuntimeError: grad can be implicitly created only for scalar outputs

In [60]:
z.sum().backward()

In [61]:
print(weights.grad)

tensor([[-0.0000, -0.0000,  0.4357],
        [ 0.3314,  0.3653,  0.8026],
        [-0.1110,  0.1594,  1.0076]])
