First pass at adding optimized bound constraints

jeffrey-hokanson · Jan 18, 2019 · 59173b6 · 59173b6
1 parent 3a67e03
commit 59173b6
Show file tree

Hide file tree

Showing 2 changed files with 121 additions and 59 deletions.
diff --git a/psdr/polyridge.py b/psdr/polyridge.py
@@ -134,11 +134,36 @@ def two_norm_fit(A,b):
 
 	.. math::
 
-		\min_{x} \| \mathbf{a} \mathbf{x} - \mathbf{b}\|_2
+		\min_{x} \| \mathbf{A} \mathbf{x} - \mathbf{b}\|_2
 
 	"""
 	return scipy.linalg.lstsq(A, b)[0]
 
+def bound_fit(A, b, norm = 2):
+	r""" solve a norm constrained problem
+
+	.. math:: 
+
+		\min_{x} \| \mathbf{A}\mathbf{x} - \mathbf{b}\|_p
+		\text{such that} \mathbf{A}	\mathbf{x} -\mathbf{b} \ge 0
+	"""
+	with warnings.catch_warnings():
+		warnings.simplefilter('ignore', PendingDeprecationWarning)
+		x = cp.Variable(A.shape[1])
+		residual = x.__rmatmul__(A) - b
+		if norm == 1:
+			obj = cp.norm1(residual)
+		elif norm == 2:
+			obj = cp.norm(residual)
+		elif norm == np.inf:
+			obj = cp.norm_inf(residual)
+		constraint = [residual >= 0] 
+		#constraint = [x.__rmatmul__(A) - b >= 0]
+		problem = cp.Problem(cp.Minimize(obj), constraint)
+		problem.solve()
+
+		# TODO: The solution doesn't obey the constraints for 1 and inf norm, but does for 2-norm.
+		return x.value
 
 
 class PolynomialRidgeApproximation(PolynomialRidgeFunction):
@@ -183,6 +208,8 @@ class PolynomialRidgeApproximation(PolynomialRidgeFunction):
 	scale: bool (default:True)
 		Scale the coordinates along the ridge to ameliorate ill-conditioning		
 	
+	bound: [None, 'lower', 'upper']
+		If 'lower' or 'upper' construct a lower or upper bound
 
 	References
 	----------
@@ -192,7 +219,8 @@ class PolynomialRidgeApproximation(PolynomialRidgeFunction):
 	"""
 
 	def __init__(self, degree, subspace_dimension, basis = 'legendre', 
-		norm = 2, n_init = 1, scale = True, keep_data = True, domain = None):
+		norm = 2, n_init = 1, scale = True, keep_data = True, domain = None,
+		bound = None):
 
 		assert isinstance(degree, int)
 		assert degree >= 0
@@ -242,6 +270,10 @@ def __init__(self, degree, subspace_dimension, basis = 'legendre',
 			self.domain = deepcopy(domain)
 
 
+		assert bound in [None, 'lower', 'upper'], "Invalid bound specified"
+		self.bound = bound
+
+
 	def fit(self, X, fX, U0 = None, **kwargs):
 		r""" Given samples, fit the polynomial ridge approximation.
 
@@ -267,12 +299,10 @@ def fit(self, X, fX, U0 = None, **kwargs):
 
 
 		# TODO Implement multiple initializations
-		if self.norm == 2:
-			return self._fit_2_norm(X, fX, U0, **kwargs)
-		elif self.norm == np.inf or self.norm == 1:
+		if self.norm == 2 and self.bound == None:
+			return self._fit_varpro(X, fX, U0, **kwargs)
+		else:	
 			return self._fit_alternating(X, fX, U0, **kwargs)
-		else:
-			raise NotImplementedError
 
 
 	################################################################################	
@@ -284,12 +314,18 @@ def _fit_affine(self, X, fX):
 		"""
 		# TODO: There is often a scaling issue 
 		XX = np.hstack([X, np.ones((X.shape[0],1))])
-		if self.norm == 1:
-			b = one_norm_fit(XX, fX)
-		elif self.norm == 2:
-			b = two_norm_fit(XX, fX)
-		elif self.norm == np.inf:
-			b = inf_norm_fit(XX, fX)
+		if self.bound is None:
+			if self.norm == 1:
+				b = one_norm_fit(XX, fX)
+			elif self.norm == 2:
+				b = two_norm_fit(XX, fX)
+			elif self.norm == np.inf:
+				b = inf_norm_fit(XX, fX)
+		elif self.bound == 'lower':
+			# fX >= XX b
+			b = bound_fit(XX, fX, norm = self.norm)
+		elif self.bound == 'upper':
+			b = bound_fit(-XX, -fX, norm = self.norm)	 	
 
 		U = b[0:-1].reshape(-1,1)
 		return U	
@@ -305,41 +341,54 @@ def _init_U(self, X, fX):
 	def _fit_coef(self, X, fX, U):
 		r""" Returns the linear coefficients
 		"""
-		self._U = U
-		self.set_scale(X)
-		V = self.V(X)
-		if self.norm == 1:
-			return one_norm_fit(V, fX)
-		elif self.norm == 2:
-			return two_norm_fit(V, fX)
-		elif self.norm == np.inf:
-			return inf_norm_fit(V, fX)
+		#self._U = U
+		self.set_scale(X, U = U)
+		V = self.V(X, U = U)
+		if self.bound is None:
+			if self.norm == 1:
+				c = one_norm_fit(V, fX)
+			elif self.norm == 2:
+				c = two_norm_fit(V, fX)
+			elif self.norm == np.inf:
+				c = inf_norm_fit(V, fX)
+			else:
+				raise NotImplementedError
+		elif self.bound == 'lower':
+			c = bound_fit(-V, -fX, norm = self.norm)
+		elif self.bound == 'upper':
+			c = bound_fit(V, fX, norm = self.norm)
 		else:
-			raise NotImplementedError
-
+			raise NotImplementedError		
+
+		print fX - V.dot(c) 
+
+		return c
+
 	def _finish(self, X, fX, U):
 		r""" Given final U, rotate and find coefficients
 		"""
 
 		# Step 1: Apply active subspaces to the profile function at samples X
 		# to rotate onto the most important directions
 		if U.shape[1] > 1:
+			self._U = U
 			self.coef = self._fit_coef(X, fX, U)
 			grads = self.profile_grad(X)
 			Ur = scipy.linalg.svd(grads.T)[0]
 			U = U.dot(Ur)
 
 		# Step 2: Flip signs such that average slope is positive in the coordinate directions
+		self._U = U
 		self.coef = self._fit_coef(X, fX, U)
 		grads = self.profile_grad(X)
-		U = U.dot(np.diag(np.sign(np.mean(grads, axis = 0))))
+		self._U = U = U.dot(np.diag(np.sign(np.mean(grads, axis = 0))))
 
 		# Step 3: final fit	
 		self.coef = self._fit_coef(X, fX, U)
 		grads = self.profile_grad(X)
 
 	################################################################################	
-	# Two norm functions
+	# VarPro based solution for the 2-norm without bound constraints 
 	################################################################################	
 
 	def _varpro_residual(self, X, fX, U_flat):
@@ -392,7 +441,7 @@ def _grassmann_trajectory(self, U_flat, Delta_flat, t):
 		U_new = orth(U_new).flatten()
 		return U_new
 
-	def _fit_2_norm(self, X, fX, U0, **kwargs):
+	def _fit_varpro(self, X, fX, U0, **kwargs):
 		if U0 is None:
 			U0 = self._init_U(X, fX)	
 
@@ -423,13 +472,6 @@ def residual(U_flat):
 
 		self._finish(X, fX, U)	
 
-
-	def _fit_fixed_U_2_norm(self, X, fX, U):
-		self.U = orth(U)
-		self.set_scale(X)
-		V = self.V(X)
-		self.coef = scipy.linalg.lstsq(V, fX)[0].flatten()
-
 	################################################################################	
 	# Generic residual and Jacobian
 	################################################################################	
@@ -493,12 +535,15 @@ def _trajectory(self, X, fX, U_c, pU_pc, alpha):
 
 		# Compute the step along the Geodesic	
 		Y, s, ZT = scipy.linalg.svd(Delta, full_matrices = False, lapack_driver = 'gesvd')
-		U = np.dot(np.dot(U,ZT.T), np.diag(np.cos(s*alpha))) + np.dot(Y, np.diag(np.sin(s*alpha)))
+		U_new= np.dot(np.dot(U,ZT.T), np.diag(np.cos(s*alpha))) + np.dot(Y, np.diag(np.sin(s*alpha)))
+
+		# TODO: align U and U_new to minimize Frobenius norm error 
+		# right the small step termination criteria is never triggering because U_new and U have different orientations
 
 		# Solve a convex problem to actually compute optimal c
-		c = self._fit_coef(X, fX, U)
+		c = self._fit_coef(X, fX, U_new)
 
-		return np.hstack([U.flatten(), c.flatten()])		
+		return np.hstack([U_new.flatten(), c.flatten()])		
 
 	def _fit_alternating(self, X, fX, U0, **kwargs):
 		M, m = X.shape
@@ -526,7 +571,7 @@ def jacobian(U_c):
 		# Initialize parameter values
 		self.set_scale(X, U0)
 		c0 = self._fit_coef(X, fX, U0)
-
+		print "initial fit"
 		U_c0 = np.hstack([U0.flatten(), c0])
 
 		# Add orthogonality constraints to search direction
@@ -539,39 +584,43 @@ def search_constraints(U_c, pU_pc):
 			constraints = [ pU_pc[k*m:(k+1)*m].__rmatmul__(U.T) == np.zeros(n) for k in range(n)]
 			return constraints
 
+		# setup lower/upper bound into SLP solver
+		obj_lb = None
+		obj_ub = None
+		if self.bound == 'lower':
+			obj_ub = np.zeros(fX.shape)
+		elif self.bound == 'upper':
+			obj_lb = np.zeros(fX.shape)
+
 		# Perform optimization
 		U_c = sequential_lp(residual, U_c0, jacobian, trajectory = trajectory,
+			obj_lb = obj_lb, obj_ub = obj_ub,
 			search_constraints = search_constraints, norm = self.norm, **kwargs)	
 
 		# Store solution	
 		U = U_c[:m*n].reshape(m,n)
 		self._finish(X, fX, U)
 
 
-class PolynomialRidgeBound(PolynomialRidgeFunction):
-	pass
-
-
-
 if __name__ == '__main__':
 	import matplotlib.pyplot as plt
 
 	np.random.seed(3)
 	p = 3
 	m = 4
 	n = 1
-	M = 500
+	M = 50
 
 	U = orth(np.random.randn(m,n))
 	coef = np.random.randn(len(LegendreTensorBasis(n,p)))
 	prf = PolynomialRidgeFunction(LegendreTensorBasis(n,p), coef, U)
 
 	X = np.random.randn(M,m)
 	fX = prf.eval(X) 
-	#fX += 1*np.random.uniform(-1,1, size = fX.shape)
+	fX += 1*np.random.uniform(-1,1, size = fX.shape)
 
 	#U0 = orth(np.random.randn(m,n))
-	pra = PolynomialRidgeApproximation(degree = p, subspace_dimension  = n, norm = np.inf)
+	pra = PolynomialRidgeApproximation(degree = p, subspace_dimension  = n, norm = 2, bound = 'lower')
 	pra.fit(X, fX, verbose = True)
 
 

diff --git a/psdr/seqlp.py b/psdr/seqlp.py
@@ -11,7 +11,7 @@
 def sequential_lp(f, x0, jac, search_constraints = None,
 	norm = 2, trajectory = trajectory_linear, obj_lb = None, obj_ub = None, 
 	maxiter = 100, bt_maxiter = 50, domain = None,
-	tol_dx = 1e-10, c_armijo = 0.01,  verbose = False, **kwargs):
+	tol_dx = 1e-10, tol_obj = 1e-10,  verbose = False, **kwargs):
 	r""" Solves a nonlinear optimization by sequential least squares
 
 
@@ -99,9 +99,9 @@ def kkt_norm(fx, jacx):
 
 
 	if verbose:
-		print 'iter |     objective     |  norm px | TR radius | KKT norm |'
-		print '-----|-------------------|----------|-----------|----------|'
-		print '%4d | %+14.10e |          |           | %8.2e |' % (0, objval, kkt_norm(fx, jacx)) 
+		print 'iter |     objective     |  norm px | TR radius | KKT norm | violation |'
+		print '-----|-------------------|----------|-----------|----------|-----------|'
+		print '%4d | %+14.10e |          |           | %8.2e |           |' % (0, objval, kkt_norm(fx, jacx)) 
 
 	Delta = 1.
 
@@ -119,30 +119,31 @@ def kkt_norm(fx, jacx):
 		elif norm == None: obj = f_lin
 
 		# Now setup constraints
-		constraints = []
+		nonlinear_constraints = []
 
 		# First, constraints on "f"
-		#if obj_lb is not None:
-		#	constraints.append(obj_lb <= f_lin)
-		#if obj_ub is not None:
-		#	constraints.append(f_lin <= obj_ub)  
+		if obj_lb is not None:
+			nonlinear_constraints.append(obj_lb <= f_lin)
+		if obj_ub is not None:
+			nonlinear_constraints.append(f_lin <= obj_ub)  
 
 
 		# Constraints on the search direction specified by user
-		constraints += search_constraints(x, p)
+		search_step_constraints = search_constraints(x, p)
 
 		# Append constraints from the domain of x
-		constraints += domain._build_constraints(p - x)
+		domain_constraints = domain._build_constraints(p - x)
 
 		# TODO: Add additional user specified constraints following scipy.optimize.NonlinearConstraint (?)
 
 		stop = False
 		for it2 in range(bt_maxiter):
 			trust_region_constraints = [cp.norm(p) <= Delta]
+			active_constraints = nonlinear_constraints + trust_region_constraints + domain_constraints + search_step_constraints
 			# Solve for the search direction
 			with warnings.catch_warnings():
 				warnings.simplefilter('ignore', PendingDeprecationWarning)
-				problem = cp.Problem(cp.Minimize(obj), constraints + trust_region_constraints)
+				problem = cp.Problem(cp.Minimize(obj), active_constraints)
 				problem.solve(**kwargs)
 
 			if problem.status in ['infeasible', 'unbounded']:
@@ -161,10 +162,22 @@ def kkt_norm(fx, jacx):
 			fx_new = np.array(f(x_new))
 			objval_new = objfun(fx_new)
 
+
+			constraint_violation = 0.
+			if obj_lb is not None:
+				I = ~(obj_lb <= fx_new)
+				constraint_violation += np.linalg.norm((fx_new - obj_lb)[I], 1)
+			if obj_ub is not None:
+				I = ~(fx_new <= obj_ub)
+				constraint_violation += np.linalg.norm((fx_new - obj_ub)[I], 1)
+
 			#if objval_new - objval <= c_armijo*(pred_objval_new - objval):
 			if objval_new < objval:
 				x = x_new
 				fx = fx_new
+
+				if np.abs(objval_new - objval) < tol_obj:
+					stop = True
 				objval = objval_new
 				Delta = max(1., np.linalg.norm(px))
 				break
@@ -178,7 +191,7 @@ def kkt_norm(fx, jacx):
 		jacx = jac(x)
 
 		if verbose:
-			print '%4d | %+14.10e | %8.2e |  %8.2e | %8.2e |' % (it+1, objval, np.linalg.norm(px), Delta, kkt_norm(fx, jacx))
+			print '%4d | %+14.10e | %8.2e |  %8.2e | %8.2e |  %8.2e |' % (it+1, objval, np.linalg.norm(px), Delta, kkt_norm(fx, jacx), constraint_violation)
 		if stop:
 			break