## 均方误差 MSELoss

In [3]:
import torch
import torch.nn as nn

y_pred = torch.tensor([2.5, 0.0, 2.1, 7.8], requires_grad=True)
y_true = torch.tensor([3.0, -0.5, 2.0, 7.0])

criterion = nn.MSELoss()
loss = criterion(y_pred, y_true)
print("MSE Loss: ", loss.item())

loss.backward()
print("gradient of y: ", y_pred.grad)


MSE Loss:  0.2875000834465027
gradient of y:  tensor([-0.2500,  0.2500,  0.0500,  0.4000])


In [5]:
# 手写
diff = y_pred - y_true
loss = torch.mean(diff**2)

print("MSE Loss: ", loss.item())

n = y_pred.shape[0]
grad_manual = (2.0/n) * (y_pred - y_true)

print("gradient manual: ", grad_manual)

MSE Loss:  0.2875000834465027
gradient manual:  tensor([-0.2500,  0.2500,  0.0500,  0.4000], grad_fn=<MulBackward0>)


## 交叉熵
二分类 $Loss = - \frac{1}{n}\sum_{i = 1}^{n}[y_i \log(\sigma(x_i)) + (1 - y_i)\log(1 - \sigma(x_i))]$

In [16]:
import torch
import torch.nn as nn

probs = torch.tensor([0.668, 0.231, 0.881], requires_grad=True)
labels = torch.tensor([1, 0, 1], dtype=torch.float32)
criterion = nn.BCELoss()
loss = criterion(probs, labels)

print("BCELoss: ", loss.item())

loss.backward()
print("gradient: ", probs.grad)


BCELoss:  0.2642763555049896
gradient:  tensor([-0.4990,  0.4335, -0.3784])


In [8]:
import torch
import torch.nn as nn

# 假设模型输出的是 logits（未经过 Sigmoid）
logits = torch.tensor([0.7, -1.2, 2.0], requires_grad=True)  # shape: [batch_size]
labels = torch.tensor([1.0, 0.0, 1.0])  # shape: [batch_size]

# 使用 BCEWithLogitsLoss（自动对 logits 做 Sigmoid）
criterion = nn.BCEWithLogitsLoss()
loss = criterion(logits, labels)

print("Binary Cross Entropy Loss:", loss.item())

# 反向传播
loss.backward()
print("logits 的梯度：", logits.grad)


Binary Cross Entropy Loss: 0.2644655108451843
logits 的梯度： tensor([-0.1106,  0.0772, -0.0397])


In [None]:
# 手写交叉熵
# 模拟一组预测概率（已过 Sigmoid）和真实标签
y_pred = torch.tensor([0.668, 0.231, 0.881], requires_grad=True)  # 模型预测的概率
y_true = torch.tensor([1.0, 0.0, 1.0])  # 标签
n = y_pred.shape[0]
epsilon = 1e-7
loss = -torch.mean(y_true * torch.log(y_pred + epsilon) + (1 - y_true) * torch.log(1 - y_pred + epsilon))
print("Loss: ", loss.item())

gradient_manual = -(y_true/y_pred - (1 - y_true)/(1 - y_pred)) / n
print("Gradient: ", gradient_manual)

# 自动求导
# loss.backward()
# print("Autograd gradient:", y_pred.grad)

Loss:  0.2642762362957001
Gradient:  tensor([-0.4990,  0.4335, -0.3784], grad_fn=<DivBackward0>)
Autograd gradient: tensor([-0.4990,  0.4335, -0.3784])


In [19]:
import torch
import torch.nn as nn

# 假设模型输出的是 logits（未经过 Sigmoid）
logits = torch.tensor([0.7, -1.2, 2.0], requires_grad=True)  # shape: [batch_size]
labels = torch.tensor([1.0, 0.0, 1.0])  # shape: [batch_size]
epsilon = 0
loss = -torch.mean(y_true * torch.log(torch.sigmoid(logits) + epsilon) + (1 - y_true) * torch.log(1 - torch.sigmoid(logits) + epsilon))

print("Binary Cross Entropy Loss:", loss.item())
n = logits.shape[0]
y_pred = torch.sigmoid(logits)
gradient_manual = - (y_true - y_pred)/n

print("gradient_manual: ", gradient_manual)
# 反向传播
loss.backward()
print("logits 的梯度：", logits.grad)

Binary Cross Entropy Loss: 0.2644655704498291
gradient_manual:  tensor([-0.1106,  0.0772, -0.0397], grad_fn=<DivBackward0>)
logits 的梯度： tensor([-0.1106,  0.0772, -0.0397])
