In [1]:
import torch


In [2]:
import torch
import torch.nn as nn

class SimpleModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(10, 5)
        self.fc2 = nn.Linear(5, 1)
        self.bn = nn.BatchNorm1d(5)
    
    def forward(self, x):
        x = self.fc1(x)
        x = self.bn(x)
        x = self.fc2(x)
        return x

model = SimpleModel()

# Using parameters()
for name, param in model.named_parameters():
    print(f"Parameter: {name}, Shape: {param.shape}")

# Using state_dict()
for key, value in model.state_dict().items():
    print(f"Key: {key}, Shape: {value.shape}")

Parameter: fc1.weight, Shape: torch.Size([5, 10])
Parameter: fc1.bias, Shape: torch.Size([5])
Parameter: fc2.weight, Shape: torch.Size([1, 5])
Parameter: fc2.bias, Shape: torch.Size([1])
Parameter: bn.weight, Shape: torch.Size([5])
Parameter: bn.bias, Shape: torch.Size([5])
Key: fc1.weight, Shape: torch.Size([5, 10])
Key: fc1.bias, Shape: torch.Size([5])
Key: fc2.weight, Shape: torch.Size([1, 5])
Key: fc2.bias, Shape: torch.Size([1])
Key: bn.weight, Shape: torch.Size([5])
Key: bn.bias, Shape: torch.Size([5])
Key: bn.running_mean, Shape: torch.Size([5])
Key: bn.running_var, Shape: torch.Size([5])
Key: bn.num_batches_tracked, Shape: torch.Size([])


In [18]:
list(model.parameters())[0]

In [20]:
list(model.state_dict().keys())

['fc1.weight',
 'fc1.bias',
 'fc2.weight',
 'fc2.bias',
 'bn.weight',
 'bn.bias',
 'bn.running_mean',
 'bn.running_var',
 'bn.num_batches_tracked']

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

# 创建一个简单的模型
class SimpleModel(nn.Module):
    def __init__(self):
        super(SimpleModel, self).__init__()
        self.fc1 = nn.Linear(10, 5)
        self.fc2 = nn.Linear(5, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# 初始化模型
model = SimpleModel()

# 创建 SGD 优化器
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

# 模拟一次训练迭代
input_data = torch.randn(32, 10)
target = torch.randn(32, 1)
criterion = nn.MSELoss()

def train_step():
    optimizer.zero_grad()
    output = model(input_data)
    loss = criterion(output, target)
    loss.backward()
    optimizer.step()
    return loss.item()

# 执行一步训练
loss = train_step()

print("\nOptimizer's state_dict:")
print(optimizer.state_dict())

print("\nParameter groups:")
for i, group in enumerate(optimizer.param_groups):
    print(f"Group {i}:")
    for key, value in group.items():
        if key != 'params':
            print(f"  {key}: {value}")
        else:
            print(f"  params: {len(value)} tensors")



Optimizer's state_dict:
{'state': {0: {'momentum_buffer': tensor([[ 0.0313,  0.0339,  0.0874, -0.0087,  0.0137,  0.1344,  0.0038, -0.0970,
          0.0033, -0.0436],
        [ 0.0576, -0.0214, -0.0337, -0.0951,  0.1063, -0.0294,  0.0698, -0.0051,
          0.0641,  0.0501],
        [-0.0129,  0.0419, -0.0064,  0.0444, -0.0317,  0.0300, -0.0335,  0.0141,
         -0.0314, -0.0156],
        [-0.0365, -0.0575,  0.0319,  0.0683, -0.0525,  0.0016,  0.0147, -0.0518,
          0.0190, -0.0135],
        [-0.0199,  0.0299,  0.0084,  0.0188, -0.0722, -0.0263, -0.0611,  0.0114,
         -0.0439, -0.0192]])}, 1: {'momentum_buffer': tensor([ 0.0098, -0.0143,  0.0483,  0.0414, -0.0137])}, 2: {'momentum_buffer': tensor([[ 0.0251,  0.0858, -0.0635, -0.0538,  0.0103]])}, 3: {'momentum_buffer': tensor([-0.0850])}}, 'param_groups': [{'lr': 0.01, 'momentum': 0.9, 'dampening': 0, 'weight_decay': 0, 'nesterov': False, 'maximize': False, 'foreach': None, 'differentiable': False, 'fused': None, 'params': [0