In [23]:
import torch

x = torch.arange(4.0)
x

tensor([0., 1., 2., 3.])

In [24]:
# 需要一个地方来存储梯度
x.requires_grad = True
x.grad

In [25]:
y = 2 * torch.dot(x,x)
y

tensor(28., grad_fn=<MulBackward0>)

In [26]:
y.backward()
x.grad

tensor([ 0.,  4.,  8., 12.])

In [27]:
x.grad == 4 * x

tensor([True, True, True, True])

In [35]:
import torch

x = torch.arange(4.0)
x.requires_grad = True

# First backward pass (calculates gradient for x)
y1 = 2 * torch.dot(x,x)
y1.backward()
# print(x.grad) # Check x.grad after first backward
# print(y1.grad) # Will be None

x.grad.zero_()

# Second backward pass (creates a new y)
y2 = x.sum()
y2.retain_grad() # <--- 调用 retain_grad() 在 backward() 之前

loss = y2 # 或者对 y2 做进一步操作得到标量 loss
loss.backward() # 对新的 y2 执行 backward()

print(x.grad) # Gradient of y2 (sum(x)) with respect to x -> [1., 1., 1., 1.]
print(y2.grad) # Gradient of y2 with respect to y2 -> [1.] (因为 backward 默认求标量对自身的导数，或对非标量求1)

tensor([1., 1., 1., 1.])
tensor(1.)


In [41]:
x.grad.zero_()
y = x * x
y.sum().backward()
x.grad

tensor([0., 2., 4., 6.])

In [42]:
x.grad.zero_()
y = x * x
# 把y当作常数
u = y.detach()
z = u * x
z.sum().backward()
x.grad

tensor([0., 1., 4., 9.])

In [43]:
x.grad.zero_()
y.sum().backward()
x.grad

tensor([0., 2., 4., 6.])

In [47]:
def f(a):
    b = a * 2
    while b.norm() < 1000:
        b = b * 2
    if b.sum() > 0:
        c = b;
    else:
        c = 100 * b
    return c
a = torch.randn(size=(),requires_grad=True)
d = f(a)
d.backward()
a.grad

tensor(1024.)

In [48]:
import torch

# 设置随机种子以便结果可复现
torch.manual_seed(42)

def f(a):
    print(f"\n==== 正向传播详细过程 ====")
    print(f"输入 a = {a.item():.6f}, requires_grad = {a.requires_grad}")
    
    # 第一步计算
    b = a * 2
    print(f"步骤 1: b = a * 2 = {a.item():.6f} * 2 = {b.item():.6f}")
    
    # while 循环跟踪
    iteration = 0
    print(f"步骤 2: 进入 while 循环，条件: b.norm() < 1000")
    while b.norm() < 1000:
        iteration += 1
        b_old = b.item()
        b = b * 2
        print(f"  迭代 {iteration}: b = {b_old:.6f} * 2 = {b.item():.6f}, norm = {b.norm().item():.6f}")
    
    print(f"步骤 3: while 循环结束, 最终 b = {b.item():.6f}, b.norm() = {b.norm().item():.6f}")
    
    # 条件分支
    print(f"步骤 4: 条件判断 b.sum() = {b.sum().item():.6f}")
    if b.sum() > 0:
        c = b
        branch_type = "if分支"
        branch_factor = 1
        print(f"  条件为真，执行 if 分支: c = b = {c.item():.6f}")
    else:
        c = 100 * b
        branch_type = "else分支"
        branch_factor = 100
        print(f"  条件为假，执行 else 分支: c = 100 * b = {c.item():.6f}")
    
    print(f"步骤 5: 返回 c = {c.item():.6f}")
    return c, iteration, branch_type, branch_factor, b.item()

# 创建需要追踪梯度的张量
a = torch.randn(size=(), requires_grad=True)
print(f"初始化随机输入: a = {a.item():.6f}")

# 前向传播
d, iterations, branch_type, branch_factor, final_b = f(a)
print(f"\n最终输出: d = {d.item():.6f}")

# 计算图可视化
print(f"\n==== 计算图 ====")
print(f"a ({a.item():.6f}) → [*2] → 初始 b")
print(f"初始 b → [*2 重复 {iterations} 次] → 最终 b ({final_b:.6f})")
if branch_factor == 1:
    print(f"最终 b → [直接赋值] → c = d ({d.item():.6f})")
else:
    print(f"最终 b → [*100] → c = d ({d.item():.6f})")

# 添加钩子函数以显示梯度流
print(f"\n==== 反向传播详细过程 ====")

# 为了跟踪中间梯度流，我们需要创建一个钩子函数
def grad_hook(name):
    def hook(grad):
        print(f"节点 {name} 的梯度: {grad.item():.6f}")
        return grad
    return hook

# 注册钩子
a.register_hook(grad_hook("a"))

# 开始反向传播
print(f"启动反向传播 d.backward()，初始梯度 = 1.0")
d.backward()

# 显示最终结果
print(f"\n==== 梯度计算分析 ====")
print(f"1. 输入值: a = {a.item():.6f}")
print(f"2. 初始操作: b = a * 2 (使 ∂b/∂a = 2)")
print(f"3. while 循环: 每次迭代 b = b * 2, 共 {iterations} 次迭代")
loop_multiplier = 2 ** iterations
print(f"   循环导致的乘数效应: 2^{iterations} = {loop_multiplier}")
total_multiplier = 2 * loop_multiplier  # 初始的 b = a * 2 乘以循环的影响
print(f"4. 条件分支: 选择了{branch_type}, 分支因子 = {branch_factor}")
print(f"5. 梯度计算: 1.0 (初始梯度) * {branch_factor} (分支因子) * {total_multiplier} (总乘数) = {branch_factor * total_multiplier}")
print(f"\n最终梯度: a.grad = {a.grad.item():.6f}")

# 手动验证梯度计算
expected_grad = branch_factor * total_multiplier
print(f"\n验证: 期望的梯度 = {expected_grad}, 实际梯度 = {a.grad.item():.6f}")
print(f"差异: {abs(expected_grad - a.grad.item())}")

初始化随机输入: a = 0.336690

==== 正向传播详细过程 ====
输入 a = 0.336690, requires_grad = True
步骤 1: b = a * 2 = 0.336690 * 2 = 0.673381
步骤 2: 进入 while 循环，条件: b.norm() < 1000
  迭代 1: b = 0.673381 * 2 = 1.346761, norm = 1.346761
  迭代 2: b = 1.346761 * 2 = 2.693523, norm = 2.693523
  迭代 3: b = 2.693523 * 2 = 5.387046, norm = 5.387046
  迭代 4: b = 5.387046 * 2 = 10.774092, norm = 10.774092
  迭代 5: b = 10.774092 * 2 = 21.548183, norm = 21.548183
  迭代 6: b = 21.548183 * 2 = 43.096367, norm = 43.096367
  迭代 7: b = 43.096367 * 2 = 86.192734, norm = 86.192734
  迭代 8: b = 86.192734 * 2 = 172.385468, norm = 172.385468
  迭代 9: b = 172.385468 * 2 = 344.770935, norm = 344.770935
  迭代 10: b = 344.770935 * 2 = 689.541870, norm = 689.541870
  迭代 11: b = 689.541870 * 2 = 1379.083740, norm = 1379.083740
步骤 3: while 循环结束, 最终 b = 1379.083740, b.norm() = 1379.083740
步骤 4: 条件判断 b.sum() = 1379.083740
  条件为真，执行 if 分支: c = b = 1379.083740
步骤 5: 返回 c = 1379.083740

最终输出: d = 1379.083740

==== 计算图 ====
a (0.336690) → [*2] → 初始 