In [1]:
import torch.optim as optim
import torch

In [2]:
weight = torch.tensor([[10, 11],
                       [12, 13]], dtype=torch.float32, requires_grad=True)
weight.grad = torch.ones((2, 2))

optimizer = optim.SGD([weight],  # iterable of parameters
                      lr=0.1)

print("weight before step:{}".format(weight.data))
optimizer.step()  # 进行一次梯度更新
print("weight after step:{}".format(weight.data))

weight before step:tensor([[10., 11.],
        [12., 13.]])
weight after step:tensor([[ 9.9000, 10.9000],
        [11.9000, 12.9000]])


In [3]:
print("optimizer.param_groups is\n{}".format(optimizer.param_groups), end='\n\n')

weight1 = torch.tensor([[100, 200],
                        [300, 400]], dtype=torch.float32, requires_grad=True)
weight1.grad = torch.ones((2, 2))

optimizer.add_param_group({"params": weight1,
                           'lr': 0.0001})  # Add a param group to the Optimizer s param_groups.

print("optimizer.param_groups is\n{}".format(optimizer.param_groups))

optimizer.param_groups is
[{'params': [tensor([[ 9.9000, 10.9000],
        [11.9000, 12.9000]], requires_grad=True)], 'lr': 0.1, 'momentum': 0, 'dampening': 0, 'weight_decay': 0, 'nesterov': False}]

optimizer.param_groups is
[{'params': [tensor([[ 9.9000, 10.9000],
        [11.9000, 12.9000]], requires_grad=True)], 'lr': 0.1, 'momentum': 0, 'dampening': 0, 'weight_decay': 0, 'nesterov': False}, {'params': [tensor([[100., 200.],
        [300., 400.]], requires_grad=True)], 'lr': 0.0001, 'momentum': 0, 'dampening': 0, 'weight_decay': 0, 'nesterov': False}]


In [4]:
# 创建多个参数组(list内的每一个字典为一组参数组);list外参数可作用与所有参数组,但优先级低于字典内参数
optimizer1 = optim.SGD([dict(params=weight, momentum=0.7, lr=0.00001),
                        dict(params=weight1, momentum=0.5)],
                       lr=0.01)  # dicts defining parameter groups
optimizer1.param_groups

[{'params': [tensor([[ 9.9000, 10.9000],
           [11.9000, 12.9000]], requires_grad=True)],
  'momentum': 0.7,
  'lr': 1e-05,
  'dampening': 0,
  'weight_decay': 0,
  'nesterov': False},
 {'params': [tensor([[100., 200.],
           [300., 400.]], requires_grad=True)],
  'momentum': 0.5,
  'lr': 0.01,
  'dampening': 0,
  'weight_decay': 0,
  'nesterov': False}]

In [5]:
print("weight in optimizer id is:{}\nweight id is:{}\n".
      format(id(optimizer.param_groups[0]['params'][0]),
             id(weight)))
print("weight1 in optimizer id is:{}\nweight1 id is:{}\n".
      format(id(optimizer.param_groups[1]['params'][0]),
             id(weight1)))  # 优化器管理的参数和实际参数指向同一内存地址

print("weight.grad is {}".format(weight.grad))
print("weight1.grad is {}".format(weight1.grad), end='\n\n')
optimizer.zero_grad()  # 清零所有参数的梯度

print("after optimizer.zero_grad(), weight.grad is\n{}".format(weight.grad))
print("after optimizer.zero_grad(), weight1.grad is\n{}".format(weight1.grad))

weight in optimizer id is:2071899966184
weight id is:2071899966184

weight1 in optimizer id is:2071899978472
weight1 id is:2071899978472

weight.grad is tensor([[1., 1.],
        [1., 1.]])
weight1.grad is tensor([[1., 1.],
        [1., 1.]])

after optimizer.zero_grad(), weight.grad is
tensor([[0., 0.],
        [0., 0.]])
after optimizer.zero_grad(), weight1.grad is
tensor([[0., 0.],
        [0., 0.]])
