In [2]:
import torch 

print(torch.__version__)
print(torch.cuda.is_available())

2.7.1
False


In [4]:
# 创建一个需要梯度的张量
tensor_requires_grad = torch.tensor([1.0], requires_grad=True)

print(tensor_requires_grad)

# 进行一些操作
tensor_result = tensor_requires_grad * 2
print(tensor_result)

# 计算梯度
tensor_result.backward()
print(tensor_requires_grad.grad)  # 输出梯度


tensor([1.], requires_grad=True)
tensor([2.], grad_fn=<MulBackward0>)
tensor([2.])


In [36]:
import torch.nn as nn

# 简单神经网络：输入2维，输出1维
model = nn.Linear(2, 1)  # y = w₁x₁ + w₂x₂ + b

# 输入数据
inputs = torch.tensor([[1.0, 2.0]], requires_grad=True)
target = torch.tensor([[5.0]])

# 前向传播
output = model(inputs)
loss = (output - target)**2

# 反向传播计算偏导数
loss.backward()

print("输入x₁的偏导数:", inputs.grad[0, 0])  # ∂loss/∂x₁
print("输入x₂的偏导数:", inputs.grad[0, 1])  # ∂loss/∂x₂
print("权重w₁的偏导数:", model.weight.grad[0, 0])  # ∂loss/∂w₁
print("权重w₂的偏导数:", model.weight.grad[0, 1])  # ∂loss/∂w₂

输入x₁的偏导数: tensor(-1.8659)
输入x₂的偏导数: tensor(6.8178)
权重w₁的偏导数: tensor(-11.5105)
权重w₂的偏导数: tensor(-23.0211)


In [40]:
# 复合函数的偏导数计算
x = torch.tensor(2.0, requires_grad=True)
y = torch.tensor(3.0, requires_grad=True)

# 复合函数：z = (x + y)²
u = x + y      # 中间变量
z = u**2

z.backward()

print(f"∂z/∂x = {x.grad}")  # 链式法则: ∂z/∂x = ∂z/∂u * ∂u/∂x = 2u * 1 = 2*(2+3)*1 = 10
print(f"∂z/∂y = {y.grad}")  # 同理: 2*(2+3)*1 = 10

∂z/∂x = 10.0
∂z/∂y = 10.0


In [None]:
#from 


# 使用偏导数进行参数优化
def gradient_descent_example():
    w = torch.tensor(1.0, requires_grad=True)  # 参数
    b = torch.tensor(0.5, requires_grad=True)  # 参数
    
    # 数据
    x_data = torch.tensor([1.0, 2.0, 3.0])
    y_data = torch.tensor([2.0, 4.0, 6.0])  # 真实关系: y = 2x
    
    learning_rate = 0.01
    
    for epoch in range(200):
        total_loss = 0
        for x, y_true in zip(x_data, y_data):
            
            # 预测
            y_pred = w * x + b
            #print(x, y_true, y_pred)
            
            # 损失
            loss = (y_pred - y_true)**2
            #print(loss)
            total_loss += loss.item()
            
            # 计算偏导数（梯度）
            loss.backward()
            
            # 更新参数（梯度下降）
            with torch.no_grad():
                w -= learning_rate * w.grad
                b -= learning_rate * b.grad
            
            # 清零梯度
            w.grad.zero_()
            b.grad.zero_()
        
        if epoch % 20 == 0:
            
            print(f"Epoch {epoch}: w={w.item():.3f}, b={b.item():.3f}, loss={total_loss/len(x_data):.3f}")

gradient_descent_example()

tensor(1.) tensor(2.) tensor(1.5000, grad_fn=<AddBackward0>)
tensor(0.2500, grad_fn=<PowBackward0>)
tensor(2.) tensor(4.) tensor(2.5300, grad_fn=<AddBackward0>)
tensor(2.1609, grad_fn=<PowBackward0>)
tensor(3.) tensor(6.) tensor(3.7458, grad_fn=<AddBackward0>)
tensor(5.0814, grad_fn=<PowBackward0>)
Epoch 0: w=1.204, b=0.584, loss=2.497
tensor(1.) tensor(2.) tensor(1.7885, grad_fn=<AddBackward0>)
tensor(0.0447, grad_fn=<PowBackward0>)
tensor(2.) tensor(4.) tensor(3.0053, grad_fn=<AddBackward0>)
tensor(0.9895, grad_fn=<PowBackward0>)
tensor(3.) tensor(6.) tensor(4.3528, grad_fn=<AddBackward0>)
tensor(2.7132, grad_fn=<PowBackward0>)
tensor(1.) tensor(2.) tensor(1.9885, grad_fn=<AddBackward0>)
tensor(0.0001, grad_fn=<PowBackward0>)
tensor(2.) tensor(4.) tensor(3.3360, grad_fn=<AddBackward0>)
tensor(0.4408, grad_fn=<PowBackward0>)
tensor(3.) tensor(6.) tensor(4.7761, grad_fn=<AddBackward0>)
tensor(1.4979, grad_fn=<PowBackward0>)
tensor(1.) tensor(2.) tensor(2.1267, grad_fn=<AddBackward0>)
t