In [0]:
import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F

In [3]:
w1 = torch.randn(2, 2)
w1.requires_grad = True

w2 = torch.randn(2, 2)
w2.requires_grad = True

w3 = torch.randn(2, 2)
w3.requires_grad = True

# 一个参数组
optimizer_1 = optim.SGD([w1, w3], lr=0.1)
print('len(optimizer.param_groups): ', len(optimizer_1.param_groups))
print(optimizer_1.param_groups, '\n')

# 两个参数组
optimizer_2 = optim.SGD([{'params': w1},
                          {'params': w2, 'lr': 0.001}],lr = 0.1)
print('len(optimizer.param_groups): ', len(optimizer_2.param_groups))
print(optimizer_2.param_groups)

len(optimizer.param_groups):  1
[{'params': [tensor([[-1.1039,  0.1478],
        [-0.7350,  0.6612]], requires_grad=True), tensor([[-0.0999,  0.0065],
        [ 0.2860, -0.5484]], requires_grad=True)], 'lr': 0.1, 'momentum': 0, 'dampening': 0, 'weight_decay': 0, 'nesterov': False}] 

len(optimizer.param_groups):  2
[{'params': [tensor([[-1.1039,  0.1478],
        [-0.7350,  0.6612]], requires_grad=True)], 'lr': 0.1, 'momentum': 0, 'dampening': 0, 'weight_decay': 0, 'nesterov': False}, {'params': [tensor([[ 0.9080, -0.8723],
        [ 0.1146, -0.0018]], requires_grad=True)], 'lr': 0.001, 'momentum': 0, 'dampening': 0, 'weight_decay': 0, 'nesterov': False}]


In [4]:
w1 = torch.randn(2, 2)
w1.requires_grad = True

w2 = torch.randn(2, 2)
w2.requires_grad = True

optimizer = optim.SGD([w1, w2], lr=0.001, momentum=0.9)

optimizer.param_groups[0]['params'][0].grad = torch.randn(2, 2)

print('参数w1的梯度：')
print(optimizer.param_groups[0]['params'][0].grad, '\n')  # 参数组，第一个参数(w1)的梯度

optimizer.zero_grad()
print('执行zero_grad()之后，参数w1的梯度：')
print(optimizer.param_groups[0]['params'][0].grad)  # 参数组，第一个参数(w1)的梯度


参数w1的梯度：
tensor([[-1.1560,  0.8170],
        [ 0.2633,  0.2294]]) 

执行zero_grad()之后，参数w1的梯度：
tensor([[0., 0.],
        [0., 0.]])


In [8]:
# ----------------------------------- state_dict
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3 , 1, 3)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(1 * 3 * 3, 2)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = x.view(-1, 1 * 3 * 3)
        x = F.relu(self.fc1(x))
        return x


net = Net()

# 获取网络当前参数
net_state_dict = net.state_dict()

print('net_state_dict类型：', type(net_state_dict))
print('net_state_dict管理的参数: ', net_state_dict.keys())
for key, value in net_state_dict.items():
    print('参数名: ', key, '\t大小: ',  value.shape)

net_state_dict类型： <class 'collections.OrderedDict'>
net_state_dict管理的参数:  odict_keys(['conv1.weight', 'conv1.bias', 'fc1.weight', 'fc1.bias'])
参数名:  conv1.weight 	大小:  torch.Size([1, 3, 3, 3])
参数名:  conv1.bias 	大小:  torch.Size([1])
参数名:  fc1.weight 	大小:  torch.Size([2, 9])
参数名:  fc1.bias 	大小:  torch.Size([2])


In [9]:
w1 = torch.randn(2, 2)
w1.requires_grad = True

w2 = torch.randn(2, 2)
w2.requires_grad = True

w3 = torch.randn(2, 2)
w3.requires_grad = True

# 一个参数组
optimizer_1 = optim.SGD([w1, w2], lr=0.1)
print('当前参数组个数: ', len(optimizer_1.param_groups))
print(optimizer_1.param_groups, '\n')

# 增加一个参数组
print('增加一组参数 w3\n')
optimizer_1.add_param_group({'params': w3, 'lr': 0.001, 'momentum': 0.8})

print('当前参数组个数: ', len(optimizer_1.param_groups))
print(optimizer_1.param_groups, '\n')

当前参数组个数:  1
[{'params': [tensor([[-0.3283,  1.2743],
        [ 0.1456,  0.5133]], requires_grad=True), tensor([[-0.1404,  0.0085],
        [ 1.0374,  0.7065]], requires_grad=True)], 'lr': 0.1, 'momentum': 0, 'dampening': 0, 'weight_decay': 0, 'nesterov': False}] 

增加一组参数 w3

当前参数组个数:  2
[{'params': [tensor([[-0.3283,  1.2743],
        [ 0.1456,  0.5133]], requires_grad=True), tensor([[-0.1404,  0.0085],
        [ 1.0374,  0.7065]], requires_grad=True)], 'lr': 0.1, 'momentum': 0, 'dampening': 0, 'weight_decay': 0, 'nesterov': False}, {'params': [tensor([[-0.3938, -0.1759],
        [-0.6844, -0.5078]], requires_grad=True)], 'lr': 0.001, 'momentum': 0.8, 'dampening': 0, 'weight_decay': 0, 'nesterov': False}] 

