-
Notifications
You must be signed in to change notification settings - Fork 581
/
lr_scheduler.py
179 lines (153 loc) · 6.82 KB
/
lr_scheduler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
"""Popular Learning Rate Schedulers"""
from __future__ import division
import math
import torch
from bisect import bisect_right
__all__ = ['LRScheduler', 'WarmupMultiStepLR', 'WarmupPolyLR']
class LRScheduler(object):
r"""Learning Rate Scheduler
Parameters
----------
mode : str
Modes for learning rate scheduler.
Currently it supports 'constant', 'step', 'linear', 'poly' and 'cosine'.
base_lr : float
Base learning rate, i.e. the starting learning rate.
target_lr : float
Target learning rate, i.e. the ending learning rate.
With constant mode target_lr is ignored.
niters : int
Number of iterations to be scheduled.
nepochs : int
Number of epochs to be scheduled.
iters_per_epoch : int
Number of iterations in each epoch.
offset : int
Number of iterations before this scheduler.
power : float
Power parameter of poly scheduler.
step_iter : list
A list of iterations to decay the learning rate.
step_epoch : list
A list of epochs to decay the learning rate.
step_factor : float
Learning rate decay factor.
"""
def __init__(self, mode, base_lr=0.01, target_lr=0, niters=0, nepochs=0, iters_per_epoch=0,
offset=0, power=0.9, step_iter=None, step_epoch=None, step_factor=0.1, warmup_epochs=0):
super(LRScheduler, self).__init__()
assert (mode in ['constant', 'step', 'linear', 'poly', 'cosine'])
if mode == 'step':
assert (step_iter is not None or step_epoch is not None)
self.niters = niters
self.step = step_iter
epoch_iters = nepochs * iters_per_epoch
if epoch_iters > 0:
self.niters = epoch_iters
if step_epoch is not None:
self.step = [s * iters_per_epoch for s in step_epoch]
self.step_factor = step_factor
self.base_lr = base_lr
self.target_lr = base_lr if mode == 'constant' else target_lr
self.offset = offset
self.power = power
self.warmup_iters = warmup_epochs * iters_per_epoch
self.mode = mode
def __call__(self, optimizer, num_update):
self.update(num_update)
assert self.learning_rate >= 0
self._adjust_learning_rate(optimizer, self.learning_rate)
def update(self, num_update):
N = self.niters - 1
T = num_update - self.offset
T = min(max(0, T), N)
if self.mode == 'constant':
factor = 0
elif self.mode == 'linear':
factor = 1 - T / N
elif self.mode == 'poly':
factor = pow(1 - T / N, self.power)
elif self.mode == 'cosine':
factor = (1 + math.cos(math.pi * T / N)) / 2
elif self.mode == 'step':
if self.step is not None:
count = sum([1 for s in self.step if s <= T])
factor = pow(self.step_factor, count)
else:
factor = 1
else:
raise NotImplementedError
# warm up lr schedule
if self.warmup_iters > 0 and T < self.warmup_iters:
factor = factor * 1.0 * T / self.warmup_iters
if self.mode == 'step':
self.learning_rate = self.base_lr * factor
else:
self.learning_rate = self.target_lr + (self.base_lr - self.target_lr) * factor
def _adjust_learning_rate(self, optimizer, lr):
optimizer.param_groups[0]['lr'] = lr
# enlarge the lr at the head
for i in range(1, len(optimizer.param_groups)):
optimizer.param_groups[i]['lr'] = lr * 10
# separating MultiStepLR with WarmupLR
# but the current LRScheduler design doesn't allow it
# reference: https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/maskrcnn_benchmark/solver/lr_scheduler.py
class WarmupMultiStepLR(torch.optim.lr_scheduler._LRScheduler):
def __init__(self, optimizer, milestones, gamma=0.1, warmup_factor=1.0 / 3,
warmup_iters=500, warmup_method="linear", last_epoch=-1):
super(WarmupMultiStepLR, self).__init__(optimizer, last_epoch)
if not list(milestones) == sorted(milestones):
raise ValueError(
"Milestones should be a list of" " increasing integers. Got {}", milestones)
if warmup_method not in ("constant", "linear"):
raise ValueError(
"Only 'constant' or 'linear' warmup_method accepted got {}".format(warmup_method))
self.milestones = milestones
self.gamma = gamma
self.warmup_factor = warmup_factor
self.warmup_iters = warmup_iters
self.warmup_method = warmup_method
def get_lr(self):
warmup_factor = 1
if self.last_epoch < self.warmup_iters:
if self.warmup_method == 'constant':
warmup_factor = self.warmup_factor
elif self.warmup_factor == 'linear':
alpha = float(self.last_epoch) / self.warmup_iters
warmup_factor = self.warmup_factor * (1 - alpha) + alpha
return [base_lr * warmup_factor * self.gamma ** bisect_right(self.milestones, self.last_epoch)
for base_lr in self.base_lrs]
class WarmupPolyLR(torch.optim.lr_scheduler._LRScheduler):
def __init__(self, optimizer, target_lr=0, max_iters=0, power=0.9, warmup_factor=1.0 / 3,
warmup_iters=500, warmup_method='linear', last_epoch=-1):
if warmup_method not in ("constant", "linear"):
raise ValueError(
"Only 'constant' or 'linear' warmup_method accepted "
"got {}".format(warmup_method))
self.target_lr = target_lr
self.max_iters = max_iters
self.power = power
self.warmup_factor = warmup_factor
self.warmup_iters = warmup_iters
self.warmup_method = warmup_method
super(WarmupPolyLR, self).__init__(optimizer, last_epoch)
def get_lr(self):
N = self.max_iters - self.warmup_iters
T = self.last_epoch - self.warmup_iters
if self.last_epoch < self.warmup_iters:
if self.warmup_method == 'constant':
warmup_factor = self.warmup_factor
elif self.warmup_method == 'linear':
alpha = float(self.last_epoch) / self.warmup_iters
warmup_factor = self.warmup_factor * (1 - alpha) + alpha
else:
raise ValueError("Unknown warmup type.")
return [self.target_lr + (base_lr - self.target_lr) * warmup_factor for base_lr in self.base_lrs]
factor = pow(1 - T / N, self.power)
return [self.target_lr + (base_lr - self.target_lr) * factor for base_lr in self.base_lrs]
if __name__ == '__main__':
import torch
import torch.nn as nn
model = nn.Conv2d(16, 16, 3, 1, 1)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
lr_scheduler = WarmupPolyLR(optimizer, niters=1000)