From 34b090f459050375cbc02ad0daa4e9a1979c93ca Mon Sep 17 00:00:00 2001 From: "Jeffrey Hokanson @ Thor" Date: Mon, 28 Jan 2019 12:07:02 -0700 Subject: [PATCH] Added support for Hessians of polynomial models --- Demos/drtb | 1 - psdr/__init__.py | 3 +- psdr/basis.py | 67 ++++++++++++++++++++++++++++++++++++++++- psdr/poly.py | 12 ++++++-- psdr/polyridge.py | 39 +++++++++++++++++++++--- tests/checkder.py | 24 +++++++++++++++ tests/test_basis.py | 28 ++++++++++++++++- tests/test_polyridge.py | 22 +++++++++++++- 8 files changed, 184 insertions(+), 12 deletions(-) delete mode 120000 Demos/drtb diff --git a/Demos/drtb b/Demos/drtb deleted file mode 120000 index 057246f..0000000 --- a/Demos/drtb +++ /dev/null @@ -1 +0,0 @@ -../drtb/ \ No newline at end of file diff --git a/psdr/__init__.py b/psdr/__init__.py index db292ba..bb2a8bb 100644 --- a/psdr/__init__.py +++ b/psdr/__init__.py @@ -2,9 +2,10 @@ from .basis import * from .function import * from .subspace import * -from .polyridge import * from .gp import * from .sample import * +from .polyridge import * +from .poly import * # Optimization from .minimax import * diff --git a/psdr/basis.py b/psdr/basis.py index d70e35b..ebcea28 100644 --- a/psdr/basis.py +++ b/psdr/basis.py @@ -159,6 +159,7 @@ def V(self, X): V: np.array Vandermonde matrix """ + X = X.reshape(-1, self.n) X = self._scale(np.array(X)) M = X.shape[0] assert X.shape[1] == self.n, "Expected %d dimensions, got %d" % (self.n, X.shape[1]) @@ -192,6 +193,7 @@ def VC(self, X, c): Vc: np.array (M,) Product of Vandermonde matrix and :math:`\mathbf c` """ + X = X.reshape(-1, self.n) X = self._scale(np.array(X)) M = X.shape[0] c = np.array(c) @@ -244,6 +246,7 @@ def DV(self, X): Derivative of Vandermonde matrix where :code:`Vp[i,j,:]` is the gradient of :code:`V[i,j]`. """ + X = X.reshape(-1, self.n) X = self._scale(np.array(X)) M = X.shape[0] V_coordinate = [self.vander(X[:,k], self.p) for k in range(self.n)] @@ -268,6 +271,66 @@ def DV(self, X): DV[:,:,k] *= dscale[k] return DV + + def DDV(self, X): + r""" Column-wise second derivative of the Vandermonde matrix + + Given points :math:`\mathbf x_i \in \mathbb{R}^n`, + this creates the Vandermonde-like matrix whose entries + correspond to the derivatives of each of basis elements; + i.e., + + .. math:: + + [\mathbf{V}]_{i,j} = \left. \frac{\partial^2}{\partial x_k\partial x_\ell} \psi_j(\mathbf{x}) + \right|_{\mathbf{x} = \mathbf{x}_i}. + + Parameters + ---------- + X: array-like (M, n) + Points at which to evaluate the basis at where :code:`X[i]` is one such point in + :math:`\mathbf{R}^m`. + + Returns + ------- + Vpp: np.array (M, N, n, n) + Second derivative of Vandermonde matrix where :code:`Vpp[i,j,:,:]` + is the Hessian of :code:`V[i,j]`. + """ + X = X.reshape(-1, self.n) + X = self._scale(np.array(X)) + M = X.shape[0] + V_coordinate = [self.vander(X[:,k], self.p) for k in range(self.n)] + + N = len(self.indices) + DDV = np.ones((M, N, self.n, self.n), dtype = X.dtype) + + try: + dscale = self._dscale() + except NotImplementedError: + dscale = np.ones(X.shape[1]) + + + for k in range(self.n): + for ell in range(k, self.n): + for j, alpha in enumerate(self.indices): + for q in range(self.n): + if q == k == ell: + # We need the second derivative + eq = np.zeros(self.p+1) + eq[alpha[q]] = 1. + der2 = self.der(eq, 2) + #print "q", q, "der2", der2, eq + DDV[:,j,k,ell] *= V_coordinate[q][:,0:-2].dot(der2) + elif q == k or q == ell: + DDV[:,j,k,ell] *= np.dot(V_coordinate[q][:,0:-1], self.Dmat[alpha[q],:]) + else: + DDV[:,j,k,ell] *= V_coordinate[q][:,alpha[q]] + + # Correct for transform + DDV[:,:,k, ell] *= dscale[k]*dscale[ell] + DDV[:,:,ell, k] = DDV[:,:,k, ell] + return DDV class MonomialTensorBasis(PolynomialTensorBasis): @@ -320,4 +383,6 @@ def _dscale(self): raise NotImplementedError - +if __name__ == '__main__': + basis = MonomialTensorBasis(2,5) + X = np.random.randn(1,2) diff --git a/psdr/poly.py b/psdr/poly.py index df9bb50..5356418 100644 --- a/psdr/poly.py +++ b/psdr/poly.py @@ -8,6 +8,7 @@ from basis import * from function import BaseFunction +__all__ = ['PolynomialFunction', 'PolynomialApproximation'] def linear_fit(A, b, norm = 2, bound = None): @@ -39,16 +40,20 @@ def linear_fit(A, b, norm = 2, bound = None): class PolynomialFunction(BaseFunction): + def __init__(self, dimension, degree, coef): + self.basis = LegendreTensorBasis(dimension, degree) + self.coef = coef def eval(self, X): V = self.basis.V(X) return V.dot(self.coef) def grad(self, X): - pass + return np.tensordot(self.basis.DV(X), self.coef, axes = (1,0)) def hessian(self, X): - pass + return np.tensordot(self.basis.DDV(X), self.coef, axes = (1,0)) + class PolynomialApproximation(PolynomialFunction): def __init__(self, degree, basis = 'legendre', norm = 2, bound = None): @@ -99,4 +104,5 @@ def fit(self, X, fX): poly = PolynomialApproximation(degree = 2) poly.fit(X, fX) - print poly.eval(X) - fX + print poly.eval(X).shape + print poly.grad(X).shape diff --git a/psdr/polyridge.py b/psdr/polyridge.py index 74d2e37..b4bc1ab 100644 --- a/psdr/polyridge.py +++ b/psdr/polyridge.py @@ -48,19 +48,49 @@ def DV(self, X, U = None): Y = U.T.dot(X.T).T return self.basis.DV(Y) + def DDV(self, X, U = None): + if U is None: U = self.U + Y = U.T.dot(X.T).T + return self.basis.DDV(Y) + def eval(self, X): - V = self.V(X) - return V.dot(self.coef) + if len(X.shape) == 1: + return self.V(X.reshape(1,-1)).dot(self.coef).reshape(1) + else: + return self.V(X).dot(self.coef) def grad(self, X): - #Y = np.dot(U.T, X.T).T + if len(X.shape) == 1: + one_d = True + X = X.reshape(1,-1) + else: + one_d = False DV = self.DV(X) # Compute gradient on projected space Df = np.tensordot(DV, self.coef, axes = (1,0)) # Inflate back to whole space Df = Df.dot(self.U.T) - return Df + if one_d: + return Df.reshape(X.shape[1]) + else: + return Df + + def hessian(self, X): + if len(X.shape) == 1: + one_d = True + X = X.reshape(1,-1) + else: + one_d = False + + DDV = self.DDV(X) + DDf = np.tensordot(DDV, self.coef, axes = (1,0)) + # Inflate back to proper dimensions + DDf = np.tensordot(np.tensordot(DDf, self.U, axes = (2,1)) , self.U, axes = (1,1)) + if one_d: + return DDf.reshape(X.shape[1], X.shape[1]) + else: + return DDf def profile_grad(self, X): r""" gradient of the profile function g @@ -623,6 +653,7 @@ def search_constraints(U_c, pU_pc): coef = np.random.randn(len(LegendreTensorBasis(n,p))) prf = PolynomialRidgeFunction(LegendreTensorBasis(n,p), coef, U) + X = np.random.randn(M,m) fX = prf.eval(X) fX += 1*np.random.uniform(-1,1, size = fX.shape) diff --git a/tests/checkder.py b/tests/checkder.py index a78ee89..daba0a0 100644 --- a/tests/checkder.py +++ b/tests/checkder.py @@ -23,6 +23,7 @@ def check_jacobian(x, residual, jacobian): return max_err + def check_gradient(x, residual, jacobian): n = x.shape[0] hvec = np.logspace(-14,-1,100) @@ -56,6 +57,29 @@ def check_derivative(x, obj, grad): return max_err + +def check_hessian(x, obj, hess): + n = x.shape[0] + H = hess(x) + + hvec = np.logspace(-7,-1,10) + max_err = 0. + + for i in range(n): + ei = np.zeros(n) + ei[i] = 1. + for j in range(n): + ej = np.zeros(n) + ej[j] = 1. + min_err = np.inf + for h in hvec: + Hij_est = ( (obj(x + h*ei + h*ej) - obj(x - h*ei + h*ej)) - (obj(x + h*ei - h*ej) - obj(x - h*ei - h*ej)) )/(4*h**2) + err = np.max(np.abs(Hij_est - H[i,j])) + min_err = np.min([min_err, err]) + print "%d %d %5.2e : %5.2e %5.2e" % (i,j,min_err, H[i,j], Hij_est) + max_err = max(min_err, max_err) + return max_err + #if __name__ == '__main__': # z = np.exp(2j*np.pi*np.linspace(0,1, 1000, endpoint = False)) # h = np.tan(64*z) diff --git a/tests/test_basis.py b/tests/test_basis.py index e22faa9..e9d2053 100644 --- a/tests/test_basis.py +++ b/tests/test_basis.py @@ -6,7 +6,7 @@ LaguerreTensorBasis, HermiteTensorBasis) -from checkder import check_jacobian +from checkder import check_jacobian, check_hessian def test_equivalence(m = 3, p = 5): @@ -52,3 +52,29 @@ def test_der(m = 3, p = 5, M = 10): grad = lambda x: basis.DV(x.reshape(1,-1)).reshape(-1, m) assert check_jacobian(X[0], obj, grad) < 1e-7 +def test_hessian(m = 2, p = 5): + X = np.random.randn(10, m) + + + bases = [MonomialTensorBasis(m, p), + LegendreTensorBasis(m, p), + ChebyshevTensorBasis(m, p), + LaguerreTensorBasis(m, p), + HermiteTensorBasis(m, p), + ] + + for basis in bases: + for i in range(len(basis)): + print "i", i + obj = lambda x: basis.V(x.reshape(1,-1))[0,i] + hess = lambda x: basis.DDV(x.reshape(1,-1))[0,i] + assert check_hessian(X[0], obj, hess) < 1e-5 + + + basis.set_scale(X) + for i in range(len(basis)): + print "i", i + obj = lambda x: basis.V(x.reshape(1,-1))[0,i] + hess = lambda x: basis.DDV(x.reshape(1,-1))[0,i] + assert check_hessian(X[0], obj, hess) < 1e-5 + diff --git a/tests/test_polyridge.py b/tests/test_polyridge.py index 39bab5f..1d52803 100644 --- a/tests/test_polyridge.py +++ b/tests/test_polyridge.py @@ -1,9 +1,29 @@ import numpy as np import scipy.linalg from psdr import PolynomialRidgeApproximation, LegendreTensorBasis, PolynomialRidgeFunction -from checkder import check_jacobian +from checkder import * +def test_polyridge_der(): + m = 5 + n = 1 + p = 3 + + U = scipy.linalg.orth(np.random.randn(m,n)) + coef = np.random.randn(len(LegendreTensorBasis(n,p))) + prf = PolynomialRidgeFunction(LegendreTensorBasis(n,p), coef, U) + + x = np.random.randn(m) + + print prf.eval(x) + print prf.grad(x) + print prf.hessian(x) + + assert check_derivative(x, prf.eval, lambda x: prf.grad(x) ) < 1e-7 + assert check_hessian(x, prf.eval, lambda x: prf.hessian(x) ) < 1e-5 + + + def test_varpro_jacobian(): np.random.seed(1) M = 100