##### AgutoGrad
Autograd automatically calculates derivatives, which are essential for optimization algorithms like gradient descent.


In [2]:

def dy_dx(x):
    return 2*x

dy_dx(2)

4

In [3]:
dy_dx(3)

6

In [4]:
import math

def dz_dx(x):
    return 2 * x * math.cos(x**2)

print(dz_dx(2))

-2.6145744834544478


In [5]:
# Example 1
import torch 

x = torch.tensor(3.0, requires_grad = True) # Requires Gradient

In [6]:
y = x ** 2

In [7]:
x

tensor(3., requires_grad=True)

In [8]:
y

tensor(9., grad_fn=<PowBackward0>)

In [9]:
y.backward() # It wil calculate all the gradients

In [10]:
x.grad # it will print the gradients

tensor(6.)

In [11]:
# Example 2
# y = x ** 2, z = sin(y)

import torch 

x = torch.tensor(3.0, requires_grad = True)

y = x ** 2

In [12]:
z = torch.sin(y)

In [13]:
print(f"x is : {x}.\n y is : {y}.\n z is : {z}.")

x is : 3.0.
 y is : 9.0.
 z is : 0.41211849451065063.


In [14]:
z.backward()

In [15]:
x.grad

tensor(-5.4668)

In [25]:
# Example 3 
# Perceptron for CGPA and placement prediction 
import torch 

x = torch.tensor(6.7) # Input feature
y = torch.tensor(0.0) # Groud Truth

w = torch.tensor(1.0) # Weight
b = torch.tensor(0.0) # Bias


In [17]:
# Binary Cross Entropy Loss for scalar
def binary_cross_entropy_loss(prediction, target):
    epsilon = 1e-8 # To prevent log(0)
    prediction = torch.clamp(prediction, epsilon, 1 - epsilon)
    return -(target * torch.log(prediction) + (1- target) * torch.log(1- prediction))

In [20]:
# Forward pass
z = w * x + b
y_pred = torch.sigmoid(z)

# Compute Binary Cross-Entropy Loss
loss = binary_cross_entropy_loss(y_pred, y)

In [22]:
loss

tensor(6.7012)

In [23]:
# Derivatives:
# 1. dL/d(y_pred): Loss with respect to the prediction (y_pred)
dloss_dy_pred = (y_pred - y)/(y_pred * (1 - y_pred))

# 2. dy_pred/dz = prediction (y_pred) with respect to z(sigmoid derivative)
dy_pred_dz = y_pred * (1-y_pred)

# 3. dz/dw and dz/db : z with respect to w and b
dz_dw = x
dz_db = 1

dL_dw = dloss_dy_pred * dy_pred_dz * dz_dw
dL_db = dloss_dy_pred * dy_pred_dz * dz_db

In [24]:
print(f"Manual radiant of loss w.r.t weight (dw) : {dL_dw}")
print(f"Manual Gradient of loss w.r.t bias (db) : {dL_db}")

Manual radiant of loss w.r.t weight (dw) : 6.691762447357178
Manual Gradient of loss w.r.t bias (db) : 0.998770534992218


In [26]:
# Now let's do the same with torch 

x = torch.tensor(6.7)
y = torch.tensor(0.0)

In [27]:
w = torch.tensor(1.0, requires_grad = True)
b = torch.tensor(0.0, requires_grad=True)

In [28]:
w

tensor(1., requires_grad=True)

In [29]:
b

tensor(0., requires_grad=True)

In [30]:
z = w * x + b

In [33]:
y_pred = torch.sigmoid(z)
y_pred

tensor(0.9988, grad_fn=<SigmoidBackward0>)

In [36]:
loss = binary_cross_entropy_loss(y_pred, y)
loss

tensor(6.7012, grad_fn=<NegBackward0>)

In [37]:
loss.backward()

In [42]:
print(w.grad)
print(b.grad)

tensor(6.6918)
tensor(0.9988)


In [43]:
# Another example

x = torch.tensor([1.0, 2.0, 3.0], requires_grad=True)
x

tensor([1., 2., 3.], requires_grad=True)

In [44]:
y = (x**2).mean()
y

tensor(4.6667, grad_fn=<MeanBackward0>)

In [45]:
y.backward()

In [None]:
x.grad # Now we are getting the 3 gradients which means, each element in the vector 

tensor([0.6667, 1.3333, 2.0000])

In [64]:
#Clearing Gradients, if we do not clear the gradiants then they get accumulate at the end.
x = torch.tensor(2.0, requires_grad =True)
x

tensor(2., requires_grad=True)

In [66]:
y = x ** 2
y

tensor(4., grad_fn=<PowBackward0>)

In [67]:
y.backward()

In [68]:
x.grad

tensor(4.)

In [69]:
x.grad.zero_() # doing inplace changes and making the grad = 0

tensor(0.)

In [None]:
# There are few ways of diabiling the gradiant tracking when it is necessary. Especially we do not need to track the gradients while testing the model as we won't be doing the back propagation for updating the weights
# If we do not turn of the tracking then it will consume lot of memory when we deal with the larger tensors during the model testing.

# Option 1 : requires_grad_(False)
# Option 2 : detach()
# Option 3 : torch.no_grad()

In [80]:
x = torch.tensor(2.0, requires_grad = True)
y = x ** 2

w = torch.tensor(1.0, requires_grad=True)
b = torch.tensor(0.0, requires_grad=True)

In [82]:
z = w * x + b
z

tensor(2., grad_fn=<AddBackward0>)

In [84]:
y_pred = torch.sigmoid(z)
y_pred

tensor(0.8808, grad_fn=<SigmoidBackward0>)

In [86]:
loss = binary_cross_entropy_loss(y_pred, y)
loss

tensor(-5.8731, grad_fn=<NegBackward0>)

In [87]:
loss.backward()

In [88]:
x = torch.tensor(2.0, requires_grad = True)
x


tensor(2., requires_grad=True)

In [89]:
with torch.no_grad():
    y = x ** 2


In [90]:
y

tensor(4.)

In [91]:
y.backward()

RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn