In [17]:
import torch
import torch.nn as nn
import torch.optim as optim

# 一个简单的神经网络，不包含残差连接
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.layer1 = nn.Linear(10, 50)
        self.layer2 = nn.Linear(50, 50)
        self.layer3 = nn.Linear(50, 1)

    def forward(self, x):
        x = torch.relu(self.layer1(x))
        x = torch.relu(self.layer2(x))
        x = self.layer3(x)
        return x

# 初始化模型、损失函数和优化器
model_no_resnet = SimpleNN()
criterion = nn.MSELoss()
optimizer = optim.SGD(model_no_resnet.parameters(), lr=0.01)

# 生成随机输入数据
x = torch.randn(10)
y_true = torch.randn(1)
print(f'y_true: {y_true}')

# 前向传播
output = model_no_resnet(x)

# 计算损失
loss = criterion(output, y_true)

# 反向传播
optimizer.zero_grad()
loss.backward()

# 查看每一层的梯度
for name, param in model_no_resnet.named_parameters():
    print(f"{name} gradient: {param.grad.norm()}")


y_true: tensor([-0.0516])
layer1.weight gradient: 0.10826874524354935
layer1.bias gradient: 0.04579398036003113
layer2.weight gradient: 0.29824385046958923
layer2.bias gradient: 0.11192195862531662
layer3.weight gradient: 0.3742479681968689
layer3.bias gradient: 0.30134689807891846


In [26]:
import torch
import torch.nn as nn
import torch.optim as optim

# 一个包含残差连接的神经网络
class ResNetNN(nn.Module):
    def __init__(self):
        super(ResNetNN, self).__init__()
        self.layer1 = nn.Linear(10, 50)
        self.layer2 = nn.Linear(50, 50)
        self.layer3 = nn.Linear(50, 1)

    def forward(self, x):
        residual = x  # 保存输入x，用于残差连接
        
        # 第一层
        x = torch.relu(self.layer1(x))
        
        # 检查输入和输出形状是否一致，不一致时使用线性层调整形状
        if residual.size(0) != x.size(0):  # 如果不匹配
            residual = self.layer1(residual)  # 使用layer1调整形状
        
        x = x + residual  # 残差连接：输入和输出相加
        residual = x  # 更新残差
        
        # 第二层
        x = torch.relu(self.layer2(x))
        
        # 再次检查形状并调整
        if residual.size(0) != x.size(0):
            residual = self.layer2(residual)
        
        x = x + residual  # 再次加入残差连接
        residual = x  # 更新残差
        
        # 第三层
        x = self.layer3(x)
        return x

# 初始化模型、损失函数和优化器
model_resnet = ResNetNN()
optimizer = optim.SGD(model_resnet.parameters(), lr=0.01)
criterion = nn.MSELoss()  # 选择均方误差损失函数

# 生成随机输入数据
x = torch.randn(10)  # 输入张量的大小是(10,)
y_true = torch.randn(1)  # 输出的大小是(1,)
print(f'y_true: {y_true}')

print(f'x: {x}')

# 前向传播
output = model_resnet(x)
print(f'output: {output}')

# 计算损失
loss = criterion(output, y_true)

# 反向传播
optimizer.zero_grad()
loss.backward()

# 查看每一层的梯度
for name, param in model_resnet.named_parameters():
    print(f"{name} gradient: {param.grad.norm()}")


y_true: tensor([0.9939])
x: tensor([-1.7246,  0.5609, -0.2517, -0.5685, -2.1980,  1.3735,  0.1306, -0.1936,
        -1.2650,  0.6860])
output: tensor([0.7804], grad_fn=<ViewBackward0>)
layer1.weight gradient: 1.6925430297851562
layer1.bias gradient: 0.4783744513988495
layer2.weight gradient: 1.2980936765670776
layer2.bias gradient: 0.18881326913833618
layer3.weight gradient: 3.2244694232940674
layer3.bias gradient: 0.4270782470703125
