In [1]:
import torch

In [2]:
x = torch.tensor(3.0, requires_grad=True)

In [3]:
y = x**2

In [4]:
print(x)
print(y)

tensor(3., requires_grad=True)
tensor(9., grad_fn=<PowBackward0>)


In [5]:
y.backward()

In [6]:
x.grad

tensor(6.)

In [15]:
x = torch.tensor(4.0, requires_grad=True)

In [16]:
y = x**2

In [17]:
z = torch.sin(y)

In [18]:
print(x)
print(y)
print(z)

tensor(4., requires_grad=True)
tensor(16., grad_fn=<PowBackward0>)
tensor(-0.2879, grad_fn=<SinBackward0>)


In [19]:
z.backward()

In [20]:
x.grad

tensor(-7.6613)

# Example

In [22]:
# inputs
x = torch.tensor(6.7) # input features
y = torch.tensor(0)   # target

w = torch.tensor(1) # weight
b = torch.tensor(0)   # bias

In [23]:
# binary_crossnetropy loss for scalar
def binary_cross_entropy(prediction, target):
  epsilon = 1e-8
  prediction = torch.clamp(prediction, epsilon, 1-epsilon)
  return -target*torch.log(prediction) - (1-target)*torch.log(1-prediction)

In [27]:
# forward pass
z = w * x + b
y_pred = torch.sigmoid(z)

# compute binary_crossentropy loss
loss = binary_cross_entropy(prediction, y)

In [28]:
loss

tensor(6.7012)

In [29]:
# Derivatives

# 1. dL/d(y_pred): loss with respect to prediction
dloss_dy_pred = (y_pred - y) / (y_pred * (1-y_pred))

# 2. dy_pred/dz: prediction of y_pred with respect to z (sigmoid derivative)
dy_pred_dz = y_pred * (1-y_pred)

# 3. dz/dw: z with respect to w
dz_dw = x

# 4. dz/db: z with respect to b
dz_db = 1

dloss_dw = dloss_dy_pred * dy_pred_dz * dz_dw
dloss_db = dloss_dy_pred * dy_pred_dz * dz_db

In [30]:
print(f"Manual gradient of loss w.r.t. w: {dloss_dw}")
print(f"Manual gradient of loss w.r.t. b: {dloss_db}")

Manual gradient of loss w.r.t. w: 6.691762447357178
Manual gradient of loss w.r.t. b: 0.998770534992218


In [34]:
# inputs
x = torch.tensor(6.7) # input features
y = torch.tensor(0.0)   # target

w = torch.tensor(1.0, requires_grad=True) # weight
b = torch.tensor(0.0, requires_grad=True)   # bias

In [35]:
w,b

(tensor(1., requires_grad=True), tensor(0., requires_grad=True))

In [36]:
z = w*x + b
z

tensor(6.7000, grad_fn=<AddBackward0>)

In [37]:
y_pred = torch.sigmoid(z)
y_pred

tensor(0.9988, grad_fn=<SigmoidBackward0>)

In [40]:
loss = binary_cross_entropy(y_pred, y)
loss

tensor(6.7012, grad_fn=<SubBackward0>)

In [41]:
loss.backward()

In [42]:
w.grad

tensor(6.6918)

In [43]:
b.grad

tensor(0.9988)