Added support for Hessians of polynomial models

jeffrey-hokanson · Jan 28, 2019 · 34b090f · 34b090f
1 parent bd2bcff
commit 34b090f
Show file tree

Hide file tree

Showing 8 changed files with 184 additions and 12 deletions.
diff --git a/Demos/drtb b/Demos/drtb
diff --git a/psdr/__init__.py b/psdr/__init__.py
@@ -2,9 +2,10 @@
 from .basis import *
 from .function import *
 from .subspace import *
-from .polyridge import *
 from .gp import * 
 from .sample import *
+from .polyridge import *
+from .poly import *
 
 # Optimization
 from .minimax import *

diff --git a/psdr/basis.py b/psdr/basis.py
@@ -159,6 +159,7 @@ def V(self, X):
 		V: np.array
 			Vandermonde matrix
 		"""
+		X = X.reshape(-1, self.n)
 		X = self._scale(np.array(X))
 		M = X.shape[0]
 		assert X.shape[1] == self.n, "Expected %d dimensions, got %d" % (self.n, X.shape[1])
@@ -192,6 +193,7 @@ def VC(self, X, c):
 		Vc: np.array (M,)
 			Product of Vandermonde matrix and :math:`\mathbf c`
 		"""
+		X = X.reshape(-1, self.n)
 		X = self._scale(np.array(X))
 		M = X.shape[0]
 		c = np.array(c)
@@ -244,6 +246,7 @@ def DV(self, X):
 			Derivative of Vandermonde matrix where :code:`Vp[i,j,:]`
 			is the gradient of :code:`V[i,j]`. 
 		"""
+		X = X.reshape(-1, self.n)
 		X = self._scale(np.array(X))
 		M = X.shape[0]
 		V_coordinate = [self.vander(X[:,k], self.p) for k in range(self.n)]
@@ -268,6 +271,66 @@ def DV(self, X):
 			DV[:,:,k] *= dscale[k] 		
 
 		return DV
+
+	def DDV(self, X):
+		r""" Column-wise second derivative of the Vandermonde matrix
+
+		Given points :math:`\mathbf x_i \in \mathbb{R}^n`, 
+		this creates the Vandermonde-like matrix whose entries
+		correspond to the derivatives of each of basis elements;
+		i.e., 
+
+		.. math::
+
+			[\mathbf{V}]_{i,j} = \left. \frac{\partial^2}{\partial x_k\partial x_\ell} \psi_j(\mathbf{x}) 
+				\right|_{\mathbf{x} = \mathbf{x}_i}.
+
+		Parameters
+		----------
+		X: array-like (M, n)
+			Points at which to evaluate the basis at where :code:`X[i]` is one such point in 
+			:math:`\mathbf{R}^m`.
+
+		Returns
+		-------
+		Vpp: np.array (M, N, n, n)
+			Second derivative of Vandermonde matrix where :code:`Vpp[i,j,:,:]`
+			is the Hessian of :code:`V[i,j]`. 
+		"""
+		X = X.reshape(-1, self.n)
+		X = self._scale(np.array(X))
+		M = X.shape[0]
+		V_coordinate = [self.vander(X[:,k], self.p) for k in range(self.n)]
+
+		N = len(self.indices)
+		DDV = np.ones((M, N, self.n, self.n), dtype = X.dtype)
+
+		try:
+			dscale = self._dscale()
+		except NotImplementedError:
+			dscale = np.ones(X.shape[1])	
+
+
+		for k in range(self.n):
+			for ell in range(k, self.n):
+				for j, alpha in enumerate(self.indices):
+					for q in range(self.n):
+						if q == k == ell:
+							# We need the second derivative
+							eq = np.zeros(self.p+1)
+							eq[alpha[q]] = 1.
+							der2 = self.der(eq, 2)
+							#print "q", q, "der2", der2, eq
+							DDV[:,j,k,ell] *= V_coordinate[q][:,0:-2].dot(der2)
+						elif q == k or q == ell:
+							DDV[:,j,k,ell] *= np.dot(V_coordinate[q][:,0:-1], self.Dmat[alpha[q],:])
+						else:
+							DDV[:,j,k,ell] *= V_coordinate[q][:,alpha[q]]
+
+				# Correct for transform
+				DDV[:,:,k, ell] *= dscale[k]*dscale[ell]
+				DDV[:,:,ell, k] = DDV[:,:,k, ell]
+		return DDV
 
 
 class MonomialTensorBasis(PolynomialTensorBasis):
@@ -320,4 +383,6 @@ def _dscale(self):
 			raise NotImplementedError
 
 
-
+if __name__ == '__main__':
+	basis = MonomialTensorBasis(2,5)
+	X = np.random.randn(1,2)
diff --git a/psdr/poly.py b/psdr/poly.py
@@ -8,6 +8,7 @@
 from basis import *
 from function import BaseFunction
 
+__all__ = ['PolynomialFunction', 'PolynomialApproximation']
 
 
 def linear_fit(A, b, norm = 2, bound = None):
@@ -39,16 +40,20 @@ def linear_fit(A, b, norm = 2, bound = None):
 
 
 class PolynomialFunction(BaseFunction):
+	def __init__(self, dimension, degree, coef):
+		self.basis = LegendreTensorBasis(dimension, degree) 
+		self.coef = coef
 
 	def eval(self, X):
 		V = self.basis.V(X)
 		return V.dot(self.coef) 
 
 	def grad(self, X):
-		pass
+		return np.tensordot(self.basis.DV(X), self.coef, axes = (1,0))
 
 	def hessian(self, X):
-		pass
+		return np.tensordot(self.basis.DDV(X), self.coef, axes = (1,0))
+
 
 class PolynomialApproximation(PolynomialFunction):
 	def __init__(self, degree, basis = 'legendre', norm = 2, bound = None):
@@ -99,4 +104,5 @@ def fit(self, X, fX):
 
 	poly = PolynomialApproximation(degree = 2)
 	poly.fit(X, fX)
-	print poly.eval(X) - fX
+	print poly.eval(X).shape
+	print poly.grad(X).shape
diff --git a/psdr/polyridge.py b/psdr/polyridge.py
@@ -48,19 +48,49 @@ def DV(self, X, U = None):
 		Y = U.T.dot(X.T).T
 		return self.basis.DV(Y)
 
+	def DDV(self, X, U = None):
+		if U is None: U = self.U
+		Y = U.T.dot(X.T).T
+		return self.basis.DDV(Y)
+
 	def eval(self, X):
-		V = self.V(X)
-		return V.dot(self.coef)
+		if len(X.shape) == 1:
+			return self.V(X.reshape(1,-1)).dot(self.coef).reshape(1)
+		else:
+			return self.V(X).dot(self.coef)
 
 	def grad(self, X):
-		#Y = np.dot(U.T, X.T).T
+		if len(X.shape) == 1:
+			one_d = True
+			X = X.reshape(1,-1)	
+		else:
+			one_d = False	
 
 		DV = self.DV(X)
 		# Compute gradient on projected space
 		Df = np.tensordot(DV, self.coef, axes = (1,0))
 		# Inflate back to whole space
 		Df = Df.dot(self.U.T)
-		return Df
+		if one_d:
+			return Df.reshape(X.shape[1])
+		else:
+			return Df
+
+	def hessian(self, X):
+		if len(X.shape) == 1:
+			one_d = True
+			X = X.reshape(1,-1)	
+		else:
+			one_d = False
+
+		DDV = self.DDV(X)
+		DDf = np.tensordot(DDV, self.coef, axes = (1,0))
+		# Inflate back to proper dimensions
+		DDf = np.tensordot(np.tensordot(DDf, self.U, axes = (2,1)) , self.U, axes = (1,1)) 
+		if one_d:
+			return DDf.reshape(X.shape[1], X.shape[1])
+		else:
+			return DDf
 
 	def profile_grad(self, X):
 		r""" gradient of the profile function g
@@ -623,6 +653,7 @@ def search_constraints(U_c, pU_pc):
 	coef = np.random.randn(len(LegendreTensorBasis(n,p)))
 	prf = PolynomialRidgeFunction(LegendreTensorBasis(n,p), coef, U)
 
+
 	X = np.random.randn(M,m)
 	fX = prf.eval(X) 
 	fX += 1*np.random.uniform(-1,1, size = fX.shape)

diff --git a/tests/checkder.py b/tests/checkder.py
@@ -23,6 +23,7 @@ def check_jacobian(x, residual, jacobian):
 
 	return max_err
 
+
 def check_gradient(x, residual, jacobian):
 	n = x.shape[0]
 	hvec = np.logspace(-14,-1,100)
@@ -56,6 +57,29 @@ def check_derivative(x, obj, grad):
 
 	return max_err
 
+
+def check_hessian(x, obj, hess):
+	n = x.shape[0]
+	H = hess(x)
+
+	hvec = np.logspace(-7,-1,10)
+	max_err = 0.
+
+	for i in range(n):	
+		ei = np.zeros(n)
+		ei[i] = 1.
+		for j in range(n):
+			ej = np.zeros(n)
+			ej[j] = 1.
+			min_err = np.inf
+			for h in hvec:
+				Hij_est = ( (obj(x + h*ei + h*ej) - obj(x - h*ei + h*ej)) - (obj(x + h*ei - h*ej) - obj(x - h*ei - h*ej)) )/(4*h**2)
+				err = np.max(np.abs(Hij_est - H[i,j]))
+				min_err = np.min([min_err, err])
+			print "%d %d %5.2e : %5.2e %5.2e" % (i,j,min_err, H[i,j], Hij_est)
+			max_err = max(min_err, max_err)
+	return max_err
+
 #if __name__ == '__main__':
 #	z = np.exp(2j*np.pi*np.linspace(0,1, 1000, endpoint = False))
 #	h = np.tan(64*z)

diff --git a/tests/test_basis.py b/tests/test_basis.py
@@ -6,7 +6,7 @@
 	LaguerreTensorBasis, 
 	HermiteTensorBasis)
 
-from checkder import check_jacobian 
+from checkder import check_jacobian, check_hessian 
 
 
 def test_equivalence(m = 3, p = 5):
@@ -52,3 +52,29 @@ def test_der(m = 3, p = 5, M = 10):
 		grad = lambda x: basis.DV(x.reshape(1,-1)).reshape(-1, m)
 		assert check_jacobian(X[0], obj, grad) < 1e-7
 
+def test_hessian(m = 2, p = 5):
+	X = np.random.randn(10, m)
+
+
+	bases = [MonomialTensorBasis(m, p),
+		LegendreTensorBasis(m, p),
+		ChebyshevTensorBasis(m, p),
+		LaguerreTensorBasis(m, p),
+		HermiteTensorBasis(m, p),
+		]
+
+	for basis in bases:
+		for i in range(len(basis)):
+			print "i", i
+			obj = lambda x: basis.V(x.reshape(1,-1))[0,i]
+			hess = lambda x: basis.DDV(x.reshape(1,-1))[0,i]
+			assert check_hessian(X[0], obj, hess) < 1e-5
+
+
+		basis.set_scale(X)
+		for i in range(len(basis)):
+			print "i", i
+			obj = lambda x: basis.V(x.reshape(1,-1))[0,i]
+			hess = lambda x: basis.DDV(x.reshape(1,-1))[0,i]
+			assert check_hessian(X[0], obj, hess) < 1e-5
+
diff --git a/tests/test_polyridge.py b/tests/test_polyridge.py
@@ -1,9 +1,29 @@
 import numpy as np
 import scipy.linalg
 from psdr import PolynomialRidgeApproximation, LegendreTensorBasis, PolynomialRidgeFunction
-from checkder import check_jacobian
+from checkder import *
 
 
+def test_polyridge_der():
+	m = 5
+	n = 1
+	p = 3
+
+	U = scipy.linalg.orth(np.random.randn(m,n))
+	coef = np.random.randn(len(LegendreTensorBasis(n,p)))
+	prf = PolynomialRidgeFunction(LegendreTensorBasis(n,p), coef, U)
+
+	x = np.random.randn(m)
+
+	print prf.eval(x)
+	print prf.grad(x)
+	print prf.hessian(x)
+
+	assert check_derivative(x, prf.eval, lambda x: prf.grad(x) ) < 1e-7
+	assert check_hessian(x, prf.eval, lambda x: prf.hessian(x) ) < 1e-5
+
+
+
 def test_varpro_jacobian():
 	np.random.seed(1)
 	M = 100