In [1]:
import torch
print(torch.__version__)

2.6.0+cpu


In [2]:
if torch.cuda.is_available():
    print(f"GPU Available: {torch.cuda.get_device_name()}")
else:
    print("No GPU using CPU")

No GPU using CPU


sample gradient calculation

In [3]:
# requires_grad -> True - will be calculating derivative
# default False
x = torch.tensor(3.0, requires_grad=True)

In [4]:
y = x ** 2

In [5]:
z = torch.sin(y)  # z = sin(y) = sin(x^2)

In [6]:
print("X: ", x)
print("Y: ", y)
print("Z: ", z)

X:  tensor(3., requires_grad=True)
Y:  tensor(9., grad_fn=<PowBackward0>)
Z:  tensor(0.4121, grad_fn=<SinBackward0>)


In [7]:
# y.backward()  # dy/dx
# gradient calculated in backward direction

In [8]:
# x.grad # dy/dx at x=3.0

In [9]:
z.backward() # dz/dx = dz/dy * dy/dx

In [None]:
x.grad  # dz/dx at x=3.0

# does not compute gradients for non-leaf nodes
# y.grad  # None, because y is not a leaf node

tensor(-5.4668)

Neural Network - Model

In [12]:
# calculating gradients for neural networks

w = torch.tensor(2.0, requires_grad=True)
b = torch.tensor(1.0, requires_grad=True)

x = torch.tensor(3.0)
y = torch.tensor(0.0)

In [13]:
# f = wx + b
def forward(x):
    return w * x + b

def sigmoid(x):
    return 1 / (1 + torch.exp(-x))

def binary_cross_entropy(y_pred, y):
    epsilon = 1e-7  # to avoid log(0)
    y_pred = torch.clamp(y_pred, epsilon, 1 - epsilon)
    return - (y * torch.log(y_pred) + (1 - y) * torch.log(1 - y_pred)).mean()

In [19]:
print("W: ", w)
print("b: ", b)
print("x: ", x)

W:  tensor(2., requires_grad=True)
b:  tensor(1., requires_grad=True)
x:  tensor(3.)


In [21]:
z = w * x + b  # linear function
y_pred = sigmoid(z)  # activation function
loss = binary_cross_entropy(y_pred, y)  # loss function
loss

tensor(7.0010, grad_fn=<NegBackward0>)

In [22]:
# backpropagation to compute gradients
loss.backward()


In [23]:
print("Loss before backpropagation: ", loss.item())
print("Gradient w.r.t W: ", w.grad.item())

Loss before backpropagation:  7.000970363616943
Gradient w.r.t W:  2.9974443912506104


clearing grad

In [None]:
# multiple time executing gradients will be accumulated
# w.grad += w.grad
# b.grad += b.grad

# to clear gradients
w.grad.zero_()

In [24]:
# To disable gradient tracking

# 1. require_grad_ = False # in-place
# w.requires_grad_(False)

# 2. detach() - create new tensor without gradient tracking
# w_detached = w.detach()

# 3. no_grad() - context manager
with torch.no_grad():
    y_pred = sigmoid(forward(x))
    loss = binary_cross_entropy(y_pred, y)
    print("Loss without gradient tracking: ", loss.item())

Loss without gradient tracking:  7.000970363616943
