In [1]:
# 默认情况下torch会累积梯度
import torch

# 初始化参数 w
w = torch.tensor([2.0], requires_grad=True)

# 输入和目标
x = torch.tensor([1.0])
y_true = torch.tensor([4.0])

# 优化器
optimizer = torch.optim.SGD([w], lr=0.1)

# 第一次前向传播 + 反向传播
y_pred = w * x
loss = (y_pred - y_true) ** 2
loss.backward()
print("After first backward, w.grad =", w.grad)

# 不清零，直接再来一次
y_pred = w * x
loss = (y_pred - y_true) ** 2
loss.backward()
# 第二次再执行一次 没有清零，又加了 -4，所以变成了 -8
print("After second backward, w.grad =", w.grad)


After first backward, w.grad = tensor([-4.])
After second backward, w.grad = tensor([-8.])


In [3]:
import torch

# 初始化参数 w
w = torch.tensor([2.0], requires_grad=True)

# 输入和目标
x = torch.tensor([1.0])
y_true = torch.tensor([4.0])

# 优化器
optimizer = torch.optim.SGD([w], lr=0.1)

# 训练 step 1
optimizer.zero_grad()              # 1️⃣ 清空旧的梯度
y_pred = w * x                     # 2️⃣ 前向传播
loss = (y_pred - y_true) ** 2      # 3️⃣ 计算损失
loss.backward()                    # 4️⃣ 反向传播，计算梯度
print("Step 1 - w.grad:", w.grad) # 输出当前梯度
optimizer.step()                   # 5️⃣ 用梯度更新参数

# 训练 step 2（再次训练）
optimizer.zero_grad()              # 再次清零，防止累积
y_pred = w * x
loss = (y_pred - y_true) ** 2
loss.backward()
print("Step 2 - w.grad:", w.grad)
optimizer.step()


Step 1 - w.grad: tensor([-4.])
Step 2 - w.grad: tensor([-3.2000])


In [None]:
# *表示参数解包把一个“元组或列表”拆成单独的位置参数传进去
def f(a, b):
    print(a, b)

pair = (1, 2)

f(*pair)   # 等价于 f(1, 2)



In [None]:
y = torch.tensor([0, 2])
y_hat = torch.tensor([[0.1, 0.3, 0.6],
                      [0.3, 0.2, 0.5]])
y_hat[[0, 1], y]

# 等价于：
# [
#   y_hat[0, y[0]],  # = y_hat[0, 0] = 0.1
#   y_hat[1, y[1]]   # = y_hat[1, 2] = 0.5
# ]
# 最终结果：
# tensor([0.1, 0.5])