In [None]:
import torch

# 1. Create tensors that require gradient calculation (leaf nodes)
x = torch.ones(5)  # input tensor
w = torch.randn(5, 3, requires_grad=True)  # weight parameter
b = torch.randn(3, requires_grad=True)     # bias parameter

# Create a target variable (random binary labels for demonstration)
target = torch.randint(0, 2, (3,)).float()  # random 0/1 values

# 2. Perform forward propagation, PyTorch dynamically builds the computation graph
z = torch.matmul(x, w) + b  # linear transformation
loss = torch.nn.functional.binary_cross_entropy_with_logits(z, target)

# Check the grad_fn attribute of loss, which points to a loss function object
print(f"Gradient function for loss = {loss.grad_fn}")

# 3. Perform backpropagation, calculate gradients
loss.backward()

# 4. View the calculated gradients
print(f"Gradient for w = {w.grad}")
print(f"Gradient for b = {b.grad}")
print(f"Target used: {target}")

Gradient function for loss = <BinaryCrossEntropyWithLogitsBackward0 object at 0x780f547348b0>
Gradient for w = tensor([[-0.0731,  0.0027, -0.0074],
        [-0.0731,  0.0027, -0.0074],
        [-0.0731,  0.0027, -0.0074],
        [-0.0731,  0.0027, -0.0074],
        [-0.0731,  0.0027, -0.0074]])
Gradient for b = tensor([-0.0731,  0.0027, -0.0074])
Target used: tensor([1., 0., 1.])


In [1]:
import torch
import torch.nn.functional as F

# 1. Create tensors that require gradient calculation (leaf nodes)
x = torch.ones(5)  # input tensor

# First layer parameters (5 → 4 hidden units)
w1 = torch.randn(5, 4, requires_grad=True)
b1 = torch.randn(4, requires_grad=True)

# Second layer parameters (4 → 3 output units)
w2 = torch.randn(4, 3, requires_grad=True)
b2 = torch.randn(3, requires_grad=True)

# Target variable
target = torch.randint(0, 2, (3,)).float()

# 2. Forward propagation (two layers with sigmoid activation)

# Layer 1: linear
z1 = torch.matmul(x, w1) + b1

# Sigmoid activation
a1 = torch.sigmoid(z1)

# Layer 2: linear
z2 = torch.matmul(a1, w2) + b2

# Loss (BCE with logits expects raw logits)
loss = F.binary_cross_entropy_with_logits(z2, target)

# Check grad_fn
print(f"Gradient function for loss = {loss.grad_fn}")

# 3. Backpropagation
loss.backward()

# 4. View gradients
print(f"Gradient for w1 = {w1.grad}")
print(f"Gradient for b1 = {b1.grad}")
print(f"Gradient for w2 = {w2.grad}")
print(f"Gradient for b2 = {b2.grad}")
print(f"Target used: {target}")

Gradient function for loss = <BinaryCrossEntropyWithLogitsBackward0 object at 0x7d2fe0fb9690>
Gradient for w1 = tensor([[-0.0318,  0.0358,  0.0994, -0.0200],
        [-0.0318,  0.0358,  0.0994, -0.0200],
        [-0.0318,  0.0358,  0.0994, -0.0200],
        [-0.0318,  0.0358,  0.0994, -0.0200],
        [-0.0318,  0.0358,  0.0994, -0.0200]])
Gradient for b1 = tensor([-0.0318,  0.0358,  0.0994, -0.0200])
Gradient for w2 = tensor([[-0.1261, -0.2682, -0.1226],
        [-0.0098, -0.0208, -0.0095],
        [-0.0320, -0.0682, -0.0311],
        [-0.0099, -0.0211, -0.0096]])
Gradient for b2 = tensor([-0.1497, -0.3184, -0.1455])
Target used: tensor([1., 1., 1.])
