In [11]:
import torch
import torch.nn as nn

y_pred = torch.tensor([0.655, 0.223, 0.993], requires_grad=True)
y_label = torch.tensor([1, 1, 0], dtype=torch.float32)
n = y_pred.shape[0]

criterion = nn.BCELoss()

loss = criterion(y_pred, y_label)

loss_manual = -torch.mean(y_label * torch.log(y_pred) + (1 - y_label)*torch.log(1 - y_pred))
gradient_manual = - (y_label / y_pred - (1 - y_label) / y_pred) /n

loss.backward()

print("Loss: ", loss.item())
print("Loss Manual: ", loss_manual.item())
print("Gradient: ", y_pred.grad)
print("Gradient Manual", gradient_manual)

Loss:  2.2951815128326416
Loss Manual:  2.2951815128326416
Gradient:  tensor([-0.5089, -1.4948, 47.6189])
Gradient Manual tensor([-0.5089, -1.4948,  0.3357], grad_fn=<DivBackward0>)


In [1]:
import torch
import torch.nn as nn

def mannual_softmax(input):
    return torch.exp(input)/torch.sum(torch.exp(input), dim=1, keepdim=True)

y_logits = torch.tensor([[0.8, 0.4, 0.2],
                         [0.2, 0.5, 0.222],
                         [0.11, 0.5234, 0.3]], requires_grad=True)

y_label = torch.tensor([0, 1, 2])
n = y_logits.shape[0]

soft_max = torch.softmax(y_logits, dim=1)
mannual_soft_max = mannual_softmax(y_logits)
print("softmax: ", soft_max)
print("Mannual softmax: ", mannual_soft_max)

y_prob = soft_max.gather(dim=1, index=y_label.unsqueeze(1)).squeeze()
print("Probability: ", y_prob)

criterion = nn.CrossEntropyLoss()



loss_manual = - torch.mean(torch.log(y_prob))

gradient_manual = soft_max
gradient_manual[range(n), y_label] -= 1
gradient_manual /= n


loss = criterion(y_logits, y_label)
loss.backward()
print("softmax: ", soft_max)
print("Loss: ", loss.item())
# print("Loss Manual: ", loss_manual)

print("Gradient: ", y_logits.grad)
print("gradient_manual", gradient_manual)


softmax:  tensor([[0.4506, 0.3021, 0.2473],
        [0.2966, 0.4003, 0.3031],
        [0.2687, 0.4063, 0.3250]], grad_fn=<SoftmaxBackward0>)
Mannual softmax:  tensor([[0.4506, 0.3021, 0.2473],
        [0.2966, 0.4003, 0.3031],
        [0.2687, 0.4063, 0.3250]], grad_fn=<DivBackward0>)
Probability:  tensor([0.4506, 0.4003, 0.3250], grad_fn=<SqueezeBackward0>)
softmax:  tensor([[-0.1831,  0.1007,  0.0824],
        [ 0.0989, -0.1999,  0.1010],
        [ 0.0896,  0.1354, -0.2250]], grad_fn=<DivBackward0>)
Loss:  0.9455661773681641
Gradient:  tensor([[-0.1831,  0.1007,  0.0824],
        [ 0.0989, -0.1999,  0.1010],
        [ 0.0896,  0.1354, -0.2250]])
gradient_manual tensor([[-0.1831,  0.1007,  0.0824],
        [ 0.0989, -0.1999,  0.1010],
        [ 0.0896,  0.1354, -0.2250]], grad_fn=<DivBackward0>)


In [2]:
# Step 2: 构造 one-hot 标签矩阵
y_one_hot = torch.zeros_like(y_logits)  # shape: [batch, num_classes]
y_one_hot.scatter_(1, y_label.unsqueeze(1), 1.0)

tensor([[1., 0., 0.],
        [0., 1., 0.],
        [0., 0., 1.]])

RuntimeError: scatter(): Expected self.dtype to be equal to src.dtype

In [23]:
# 检验计算图是否被污染
import torch

x = torch.tensor([[2.0, 1.0, 0.1]], requires_grad=True)
y = torch.softmax(x, dim=1)
# y[0, 0] -= 1  # 修改计算图中间变量！

z = y.sum()
z.backward()
print("x.grad:", x.grad)


x.grad: tensor([[-7.8559e-08, -2.8900e-08, -1.1750e-08]])


In [None]:
import torch

x = torch.tensor([1.0, 2.0, 3.0], requires_grad=True)

# # 克隆出来
# x_clone = x.clone()

# # 改变 x_clone 的值（仍然有 grad_fn）
# x_clone[0] = x_clone[0] * 100  # 不 in-place，所以合法

# # 用 clone 参与计算
# y = x_clone.sum()

y = torch.sum(x[0] * 100)
y.backward()

print("x.grad:", x.grad)  


x.grad: tensor([100., 100., 100.])
