In [126]:
import torch.optim as optim
import torch

loss_value = 0.5 # 损失值
LR = 1 # 学习率
iteration = 10
max_epoch = 200
weights = torch.tensor(1, requires_grad=True, dtype=torch.float32)
target = torch.zeros(1)

optimizer = optim.SGD([weights], lr=LR, momentum=0.9)

# Reduce learning rate when a metric has stopped improving
scheduler_lr = optim.lr_scheduler.ReduceLROnPlateau(optimizer, # 调节的是优化器lr参数
                                                    factor=0.1, # factor (float) – Factor by which the learning rate will be reduced. new_lr = lr * factor. Default: 0.1.
                                                    mode='min', # mode=min通用用于损失类指标,mode=max通用用于精度类指标
                                                    patience=5,
                                                    cooldown=5,
                                                    min_lr=0, # 学习率下限
                                                    eps=1e-12, # eps (float) – Minimal decay applied to lr. If the difference between new and old lr is smaller than eps, the update is ignored. Default: 1e-8.
                                                    verbose=True) # If True, prints a message to stdout for each update. Default: False

In [127]:
'''
mode (str) –
    One of min, max. In min mode, lr will be reduced when the quantity monitored has stopped decreasing;
    in max mode it will be reduced when the quantity monitored has stopped increasing. Default: ‘min’.

patience (int) –
    Number of epochs with no improvement after which learning rate will be reduced.
    For example, if patience = 2, then we will ignore the first 2 epochs with no improvement,
    and will only decrease the LR after the 3rd epoch if the loss still hasn’t improved then. Default: 10.

cooldown (int) –
    Number of epochs to wait before resuming normal operation after lr has been reduced. Default: 0.

min_lr (float or list) –
    A scalar or a list of scalars.
    A lower bound on the learning rate of all param groups or each group respectively. Default: 0.
'''

for epoch in range(max_epoch):
    for i in range(iteration):
        optimizer.step()
        optimizer.zero_grad()

    if epoch==2:
       loss_value=0.1

    # 输出Epoch从1开始
    scheduler_lr.step(loss_value) # 传入监控指标

Epoch     9: reducing learning rate of group 0 to 1.0000e-01.
Epoch    20: reducing learning rate of group 0 to 1.0000e-02.
Epoch    31: reducing learning rate of group 0 to 1.0000e-03.
Epoch    42: reducing learning rate of group 0 to 1.0000e-04.
Epoch    53: reducing learning rate of group 0 to 1.0000e-05.
Epoch    64: reducing learning rate of group 0 to 1.0000e-06.
Epoch    75: reducing learning rate of group 0 to 1.0000e-07.
Epoch    86: reducing learning rate of group 0 to 1.0000e-08.
Epoch    97: reducing learning rate of group 0 to 1.0000e-09.
Epoch   108: reducing learning rate of group 0 to 1.0000e-10.
Epoch   119: reducing learning rate of group 0 to 1.0000e-11.
Epoch   130: reducing learning rate of group 0 to 1.0000e-12.


In [128]:
# 运行结果解析:
# 第1轮loss=0.5
# 第3轮时loss=0.1,loss下降
# 第4, 5, 6, 7, 8轮时loss=0.1,loss不变
# 由于patience=5,故第9轮时学习率下降,即学习率=学习率*factor
# 第10, 11, 12, 13, 14轮loss仍然不变
# 第15, 16, 17, 18, 19轮loos仍然不变
# 由于patience=5,cooldown=5,故第20轮时学习率下降
# ......
# ......
# ......
# 若学习率继续下降,第Epoch130轮学习率与下一次学习率差值将小于eps,故此后学习率将不再下降