forked from gonum/optimize
-
Notifications
You must be signed in to change notification settings - Fork 0
/
stepsizers.go
185 lines (158 loc) · 6.21 KB
/
stepsizers.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
// Copyright ©2014 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package optimize
import (
"math"
"github.com/gonum/floats"
)
const (
initialStepFactor = 1
quadraticMinimumStepSize = 1e-3
quadraticMaximumStepSize = 1
quadraticThreshold = 1e-12
firstOrderMinimumStepSize = quadraticMinimumStepSize
firstOrderMaximumStepSize = quadraticMaximumStepSize
)
// ConstantStepSize is a StepSizer that returns the same step size for
// every iteration.
type ConstantStepSize struct {
Size float64
}
func (c ConstantStepSize) Init(_ *Location, _ []float64) float64 {
return c.Size
}
func (c ConstantStepSize) StepSize(_ *Location, _ []float64) float64 {
return c.Size
}
// QuadraticStepSize estimates the initial line search step size as the minimum
// of a quadratic that interpolates f(x_{k-1}), f(x_k) and ∇f_k⋅p_k.
// This is useful for line search methods that do not produce well-scaled
// descent directions, such as gradient descent or conjugate gradient methods.
// The step size is bounded away from zero.
type QuadraticStepSize struct {
// Threshold determines that the initial step size should be estimated by
// quadratic interpolation when the relative change in the objective
// function is larger than Threshold. Otherwise the initial step size is
// set to 2*previous step size.
// If Threshold is zero, it will be set to 1e-12.
Threshold float64
// InitialStepFactor sets the step size for the first iteration to be InitialStepFactor / |g|_∞.
// If InitialStepFactor is zero, it will be set to one.
InitialStepFactor float64
// MinStepSize is the lower bound on the estimated step size.
// MinStepSize times GradientAbsTol should always be greater than machine epsilon.
// If MinStepSize is zero, it will be set to 1e-3.
MinStepSize float64
// MaxStepSize is the upper bound on the estimated step size.
// If MaxStepSize is zero, it will be set to 1.
MaxStepSize float64
fPrev float64
dirPrevNorm float64
projGradPrev float64
xPrev []float64
}
func (q *QuadraticStepSize) Init(loc *Location, dir []float64) (stepSize float64) {
if q.Threshold == 0 {
q.Threshold = quadraticThreshold
}
if q.InitialStepFactor == 0 {
q.InitialStepFactor = initialStepFactor
}
if q.MinStepSize == 0 {
q.MinStepSize = quadraticMinimumStepSize
}
if q.MaxStepSize == 0 {
q.MaxStepSize = quadraticMaximumStepSize
}
if q.MaxStepSize <= q.MinStepSize {
panic("optimize: MinStepSize not smaller than MaxStepSize")
}
gNorm := floats.Norm(loc.Gradient, math.Inf(1))
stepSize = math.Max(q.MinStepSize, math.Min(q.InitialStepFactor/gNorm, q.MaxStepSize))
q.fPrev = loc.F
q.dirPrevNorm = floats.Norm(dir, 2)
q.projGradPrev = floats.Dot(loc.Gradient, dir)
q.xPrev = resize(q.xPrev, len(loc.X))
copy(q.xPrev, loc.X)
return stepSize
}
func (q *QuadraticStepSize) StepSize(loc *Location, dir []float64) (stepSize float64) {
stepSizePrev := floats.Distance(loc.X, q.xPrev, 2) / q.dirPrevNorm
projGrad := floats.Dot(loc.Gradient, dir)
stepSize = 2 * stepSizePrev
if !floats.EqualWithinRel(q.fPrev, loc.F, q.Threshold) {
// Two consecutive function values are not relatively equal, so
// computing the minimum of a quadratic interpolant might make sense
df := (loc.F - q.fPrev) / stepSizePrev
quadTest := df - q.projGradPrev
if quadTest > 0 {
// There is a chance of approximating the function well by a
// quadratic only if the finite difference (f_k-f_{k-1})/stepSizePrev
// is larger than ∇f_{k-1}⋅p_{k-1}
// Set the step size to the minimizer of the quadratic function that
// interpolates f_{k-1}, ∇f_{k-1}⋅p_{k-1} and f_k
stepSize = -q.projGradPrev * stepSizePrev / quadTest / 2
}
}
// Bound the step size to lie in [MinStepSize, MaxStepSize]
stepSize = math.Max(q.MinStepSize, math.Min(stepSize, q.MaxStepSize))
q.fPrev = loc.F
q.dirPrevNorm = floats.Norm(dir, 2)
q.projGradPrev = projGrad
copy(q.xPrev, loc.X)
return stepSize
}
// FirstOrderStepSize estimates the initial line search step size based on the
// assumption that the first-order change in the function will be the same as
// that obtained at the previous iteration. That is, the initial step size s^0_k
// is chosen so that
// s^0_k ∇f_k⋅p_k = s_{k-1} ∇f_{k-1}⋅p_{k-1}
// This is useful for line search methods that do not produce well-scaled
// descent directions, such as gradient descent or conjugate gradient methods.
type FirstOrderStepSize struct {
// InitialStepFactor sets the step size for the first iteration to be InitialStepFactor / |g|_∞.
// If InitialStepFactor is zero, it will be set to one.
InitialStepFactor float64
// MinStepSize is the lower bound on the estimated step size.
// MinStepSize times GradientAbsTol should always be greater than machine epsilon.
// If MinStepSize is zero, it will be set to 1e-3.
MinStepSize float64
// MaxStepSize is the upper bound on the estimated step size.
// If MaxStepSize is zero, it will be set to 1.
MaxStepSize float64
dirPrevNorm float64
projGradPrev float64
xPrev []float64
}
func (fo *FirstOrderStepSize) Init(loc *Location, dir []float64) (stepSize float64) {
if fo.InitialStepFactor == 0 {
fo.InitialStepFactor = initialStepFactor
}
if fo.MinStepSize == 0 {
fo.MinStepSize = firstOrderMinimumStepSize
}
if fo.MaxStepSize == 0 {
fo.MaxStepSize = firstOrderMaximumStepSize
}
if fo.MaxStepSize <= fo.MinStepSize {
panic("optimize: MinStepSize not smaller than MaxStepSize")
}
gNorm := floats.Norm(loc.Gradient, math.Inf(1))
stepSize = math.Max(fo.MinStepSize, math.Min(fo.InitialStepFactor/gNorm, fo.MaxStepSize))
fo.dirPrevNorm = floats.Norm(dir, 2)
fo.projGradPrev = floats.Dot(loc.Gradient, dir)
fo.xPrev = resize(fo.xPrev, len(loc.X))
copy(fo.xPrev, loc.X)
return stepSize
}
func (fo *FirstOrderStepSize) StepSize(loc *Location, dir []float64) (stepSize float64) {
stepSizePrev := floats.Distance(loc.X, fo.xPrev, 2) / fo.dirPrevNorm
projGrad := floats.Dot(loc.Gradient, dir)
stepSize = stepSizePrev * fo.projGradPrev / projGrad
stepSize = math.Max(fo.MinStepSize, math.Min(stepSize, fo.MaxStepSize))
fo.dirPrevNorm = floats.Norm(dir, 2)
fo.projGradPrev = floats.Dot(loc.Gradient, dir)
copy(fo.xPrev, loc.X)
return stepSize
}