In [21]:
# pip install torch

In [22]:
import torch

print(torch.__version__)

2.9.0+cpu


# Calculating Derivative 

In [23]:
x = torch.tensor(2.0, requires_grad=True) # requires_grad tracks derivative computation on the tensors as they are used ahead in the operations.


In [24]:
y = x**2 

y # tensor(9., grad_fn=<PowBackward0>) grad_fn=<PowBackward0> is the backward function that computes the gradient for y w.r.t. x

tensor(4., grad_fn=<PowBackward0>)

In [25]:
# finding gradient dy/dx

y_der = y.backward(retain_graph=True) # computes the gradient of y w.r.t. x - to be used on the output tensors only and not the intermediate derviatives


In [26]:
x.grad # x.grad tensor(4.) for x = 2, dy/dx = 2x = 4

tensor(4.)

In [27]:
a = x**4

a.backward()

"""

x.grad now holds the gradient dz/dx = 4*(x ** 3) but since another loss(y.backward()) is also calculated for this input, both the loss will add up 

i.e L2 = L2 + L1 => dz/dx + dy/dx => 4*(x ** 3) + 2x = 4*(2 ** 3) + 2*2 = 36

"""

In [28]:
x.grad # x.grad now holds the gradient dz/dx = 4*(x**3) but since another loss(y.backward()) is also calculated for this input, both the loss will add up 
# i.e L2 = L2 + L1 => dz/dx + dy/dx => 4*(x**3) + 2x = 4*(2**3) + 2*2 = 36



tensor(36.)

# BINARY CROSS ENTROPY LOSS

In [29]:
y = torch.tensor(0.0)
x = torch.tensor(6.7)

In [30]:
w = torch.tensor(1.0, requires_grad=True)
b = torch.tensor(0.0, requires_grad=True)

In [31]:
z = w*x + b


In [32]:
y_pred = torch.sigmoid(z)
y_pred

tensor(0.9988, grad_fn=<SigmoidBackward0>)

In [33]:
def binary_cross_entropy(y_pred, y):
    epsilon = 1e-7 
    y_pred = torch.clamp(y_pred, epsilon, 1 - epsilon)
    return -(y*torch.log(y_pred) + (1-y)*torch.log(1-y_pred))


loss = binary_cross_entropy(y_pred, y)


In [34]:
loss

tensor(6.7012, grad_fn=<NegBackward0>)

In [35]:
loss.backward() # dL/dw  implemented at w & b only because requires_grad is set as True

In [36]:
w.grad 

tensor(6.6918)

In [37]:
b.grad

tensor(0.9988)

# finding gradient with vector values

In [38]:
x = torch.tensor([1.0, 2.0, 3.0], requires_grad=True)
y = (x**2).mean() # y = ( x1**2 + x2**2 + x3**2 )/ 3

y.backward()

In [39]:
print(y)
print(' <-- --> ')
print(x)
print(' <-- --> ')
print(x.grad) # tensor([0.6667, 1.3333, 2.0000]) == [dy/dx1, dy/dx2, dy/dx3]

tensor(4.6667, grad_fn=<MeanBackward0>)
 <-- --> 
tensor([1., 2., 3.], requires_grad=True)
 <-- --> 
tensor([0.6667, 1.3333, 2.0000])


# Clearing gradients

In [40]:
x.grad.zero_() # clearing the gradients

tensor([0., 0., 0.])

# To Pause / Remove Gradient Tracking

In [41]:
# 1. requires_grad_(False)
# 2. detach()
# 3. torch.no_grad()

In [42]:
# 1.
# x.requires_grad_(False)

# 2.
z = x.detach()

# 3.
with torch.no_grad():
    y = x**3

In [43]:
print(x) # tensor([1., 2., 3.], requires_grad=True)

print(z) # tensor([1., 2., 3.])

print(y) # tensor([ 1.,  8., 27.])

tensor([1., 2., 3.], requires_grad=True)
tensor([1., 2., 3.])
tensor([ 1.,  8., 27.])
