-
Notifications
You must be signed in to change notification settings - Fork 15
/
unconstrained.py
196 lines (136 loc) · 6.71 KB
/
unconstrained.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
import numpy as np
from scipy.optimize import line_search, BFGS
from numdifftools import Gradient
class DescentAlgorithm:
def __init__(self, fun, gradient=None, hess=None, nd={}, wolfe_c1=1e-4, wolfe_c2=0.1,
x_tol=1e-6, f_tol=1e-6, max_iter=50, save_history=False):
"""
Solver for Gradient-based Descent Algorithms.
Parameters
----------
fun : callable
Objective function ``f(x, *args)`` of minimization problem.
gradient : callable or None, optional
Gradient of objective function ``gradient(x, *args)``
of minimization problem. If None, numdifftools Gradient is created. Defaults to None.
hess : callable or HessianUpdateStrategy, optional
Hessian of the objective function.
In Newton method, should be provided as ``hess(x, *args)``, whereas in QuasiNewton methods
it should be a scipy.optimize.HessianUpdateStrategy instance, Defaults to None.
nd : dict, optional
Keyword arguments passed to numdifftools is gradient is not provided.
Defaults to {}.
wolfe_c1 : float, optional
Wolfe line search c1. Defaults to 1e-4.
wolfe_c2 : float, optional
Wolfe line search c2. Defaults to 0.1.
x_tol : float, optional
Tolerance for advance in x. Defaults to 1e-6.
f_tol : float, optional
Tolerance for advance in f(x). Defaults to 1e-6.
max_iter : int, optional
Max iterations. Defaults to 50.
save_history : bool, optional
Either of not to store history of iterations. Defaults to False.
"""
self.fun = fun
if gradient is None:
self.gradient = Gradient(fun, **nd)
else:
self.gradient = gradient
self.hess = hess
self.wolfe_coefs = wolfe_c1, wolfe_c2
self.x_tol = x_tol
self.f_tol = f_tol
self.max_iter = max_iter
self.save_history = save_history
self.history = []
def optimize(self, x0, *args, **kwargs):
x0 = np.atleast_1d(x0).astype(float)
self.history = []
xk = x0.copy()
fk = self.fun(x0, *args, **kwargs)
gradk = self.gradient(x0, *args, **kwargs)
fc, gc = 1, 1
pk = self.prepare_initial_step(xk, fk, gradk, *args, **kwargs)
advance_x, advance_f = True, True
k = 0
if self.save_history:
self.history.append({"x":xk, "f":fk, "grad":gradk})
while (advance_x or advance_f) and (k <= self.max_iter):
alpha, fc_, gc_, fnew, fk, gradnew = line_search(self.fun, self.gradient,
xk, pk, gradk, fk, args=args,
c1=self.wolfe_coefs[0],
c2=self.wolfe_coefs[1],
maxiter=15)
if alpha is None:
alpha = 1
fnew = self.fun(xk + alpha * pk, *args, **kwargs)
gradnew = self.gradient(xk + alpha * pk, *args, **kwargs)
xnew = xk + alpha * pk
fc = fc + fc_
gc = gc + gc_
if gradnew is None:
gradnew = self.gradient(xnew)
advance_f = abs(fnew - fk) > self.f_tol
advance_x = np.linalg.norm(xnew - xk) > self.x_tol
xk, fk, gradk, pk = self.prepare_next_step(xk, fk, gradk, pk, xnew, fnew, gradnew, *args, **kwargs)
k = k + 1
if self.save_history:
self.history.append({"x":xk, "f":fk, "grad":gradk})
if np.linalg.norm(pk) < np.sqrt(np.finfo(float).eps):
self.message = 'Negligible step'
self.success = True
break
if not (advance_x or advance_f):
self.success = True
self.message = 'Tolerance reached'
elif k > self.max_iter:
self.success = False
self.message = 'Max iterations reached'
self.x = xk
self.f = fk
self.grad = gradk
self.fc = fc
self.gc = gc
self.result = {"x":xk, "f":fk, "grad":gradk, "iter":k, "message":self.message, "success":self.success}
def prepare_next_step(self, xk, fk, gradk, pk, xnew, fnew, gradnew, *args, **kwargs):
pass
def prepare_initial_step(self, xk, fk, gradk, *args, **kwargs):
pass
class SteepestDescent(DescentAlgorithm):
def prepare_next_step(self, xk, fk, gradk, pk, xnew, fnew, gradnew, *args, **kwargs):
return xnew, fnew, gradnew, -gradnew
def prepare_initial_step(self, xk, fk, gradk, *args, **kwargs):
return -gradk
class ConjugateGradient(SteepestDescent):
def prepare_next_step(self, xk, fk, gradk, pk, xnew, fnew, gradnew, *args, **kwargs):
return xnew, fnew, gradnew, -gradnew + pk * gradnew.dot(gradnew) / gradk.dot(gradk)
class Newton(DescentAlgorithm):
def __init__(self, fun, gradient=None, hess=None, nd={}, wolfe_c1=1e-4, wolfe_c2=0.9,
x_tol=1e-6, f_tol=1e-6, max_iter=50, save_history=False):
if hess is None:
raise TypeError("Must provide hessian")
super().__init__(fun, gradient=gradient, hess=hess, nd=nd, wolfe_c1=wolfe_c1, wolfe_c2=wolfe_c2,
x_tol=x_tol, f_tol=f_tol, max_iter=max_iter, save_history=save_history)
def prepare_next_step(self, xk, fk, gradk, pk, xnew, fnew, gradnew, *args, **kwargs):
H = self.hess(xnew, *args, **kwargs)
return xnew, fnew, gradnew, np.linalg.solve(H, -gradnew)
def prepare_initial_step(self, xk, fk, gradk, *args, **kwargs):
H = self.hess(xk, *args, **kwargs)
return np.linalg.solve(H, -gradk)
class QuasiNewton(Newton):
def __init__(self, fun, gradient=None, hess=None, nd={}, wolfe_c1=1e-4, wolfe_c2=0.9,
x_tol=1e-6, f_tol=1e-6, max_iter=50, save_history=False):
if hess is None:
hess = BFGS(exception_strategy="damp_update", min_curvature=0.2)
super().__init__(fun, gradient=gradient, hess=hess, nd=nd, wolfe_c1=wolfe_c1, wolfe_c2=wolfe_c2,
x_tol=x_tol, f_tol=f_tol, max_iter=max_iter, save_history=save_history)
def prepare_next_step(self, xk, fk, gradk, pk, xnew, fnew, gradnew, *args, **kwargs):
self.hess.update(xnew - xk, gradnew - gradk)
H = self.hess.get_matrix()
return xnew, fnew, gradnew, np.linalg.solve(H, -gradnew)
def prepare_initial_step(self, xk, fk, gradk, *args, **kwargs):
self.hess.initialize(xk.shape[0], "hess")
H = self.hess.get_matrix()
return np.linalg.solve(H, -gradk)