In [None]:
%autosave 0

# Testing non-linear solvers on the Rosenbrock function

## Problem definition

In this notebook we show how to set a user-defined objective function and minimize it using different solvers.
The function understudy is the well-known convex Rosenbrock function. Its analytical form for the 2D case takes the follwing form:
\begin{equation}
\phi(x,y) = (1-x)^2 + 100 (y-x^2)^2,
\end{equation}
in which the unique global minimum is at $x=y=1$. The global minimum is inside a long, narrow, parabolic-shaped flat valley. To find the valley is trivial. To converge to the global minimum, however, is difficult. Hence, this function represents a good testing case for any non-linear optimization scheme.

In [None]:
#Loading necessary modules
import sys
sys.path.insert(0, "../python")
import pyVector as Vec
import pyOperator as Op
import pyProblem as Prblm
from pyStopper import BasicStopper as Stopper
from pyStepper import ParabolicStep as StepperPar
from pyStepper import CvSrchStep as StepperMT
from pyNonLinearSolver import NLCGsolver as NLCG
from pyNonLinearSolver import LBFGSsolver as LBFGS
import numpy as np
#Plotting library
import matplotlib
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable
%matplotlib inline
params = {
    'image.interpolation': 'nearest',
    'image.cmap': 'gray',
    'savefig.dpi': 300,  # to adjust notebook inline plot size
    'axes.labelsize': 12, # fontsize for x and y labels (was 10)
    'axes.titlesize': 12,
    'font.size': 12, # was 10
    'legend.fontsize': 12, # was 10
    'xtick.labelsize': 12,
    'ytick.labelsize': 12,
}
matplotlib.rcParams.update(params)

Let's first define the problem object. Our model vector is going to be $\mathbf{m} = [x \,, \, y]^T$. Since the libary assumes that the objective function is written in terms of some residual vector (i.e., $\phi(\mathbf{r}(\mathbf{m}))$, we will create a vector containing objective function as a single scalar value. 

In [None]:
class Rosenbrock_prblm(Prblm.Problem):
	"""
	   Rosenbrock function inverse problem
	   f(x,y) = (1 - x)^2 + 100*(y -x^2)^2
	   m = [x y]'
	   res = objective function value
	"""

	def __init__(self,x_initial,y_initial):
		"""Constructor of linear problem"""
		#Setting the bounds (if any)
		super(Rosenbrock_prblm,self).__init__(None,None)
		#Setting initial model
		self.model  = Vec.vectorIC(np.array((x_initial,y_initial)))
		self.dmodel = self.model.clone()
		self.dmodel.zero()
		#Gradient vector
		self.grad=self.dmodel.clone()
		#Residual vector
		self.res = Vec.vectorIC(np.array((0.,)))
		#Dresidual vector
		self.dres=self.res.clone()
		#Setting default variables
		self.setDefaults()
		self.linear=False
		return

	def objf(self,model):
		"""Objective function computation"""
		m = model.getNdArray() #Getting ndArray of the model
		obj = self.res.arr[0]
		return obj

	def resf(self,model):
		"""Residual function"""
		m = model.getNdArray() #Getting ndArray of the model
		self.res.getNdArray()[0] = (1.0 - m[0])*(1.0 - m[0]) + 100.0 * (m[1] - m[0]*m[0]) * (m[1] - m[0]*m[0])
		return self.res

	def gradf(self,model,res):
		"""Gradient computation"""
		m = model.getNdArray() #Getting ndArray of the model
		self.grad.getNdArray()[0] = - 2.0 * (1.0 - m[0]) - 400.0 * m[0] * (m[1] - m[0]*m[0])
		self.grad.getNdArray()[1] = 200.0 * (m[1] - m[0]*m[0])
		return self.grad

	def dresf(self,model,dmodel):
		"""Linear variation of the objective function value"""
		m = model.getNdArray() #Getting ndArray of the model
		dm = dmodel.getNdArray() #Getting ndArray of the model
		self.dres.arr[0] = (- 2.0 * (1.0 - m[0]) - 400.0 * m[0] * (m[1] - m[0]*m[0]))* dm[0] + (200.0 * (m[1] - m[0]*m[0])) * dm[1]
		return self.dres

### Instantiation of the inverse problem and of the various solvers

In [None]:
#Starting point for all the optimization problem
x_init = -1.0
y_init = -1.0
#Testing solver on Rosenbrock function
Ros_prob = Rosenbrock_prblm(x_init,y_init)

Before running any inversion, let's compute the objective function for different values of $x$ and $y$. This step will be useful when we want to plot the optimization path taken by the various tested algorithms.

In [None]:
#Computing the objective function for plotting
x_samples = np.linspace(-1.5,1.5,1000)
y_samples = np.linspace(3,-1.5,1000)
obj_ros = Vec.vectorIC(np.zeros((len(x_samples),len(y_samples))))
obj_ros_np = obj_ros.getNdArray()
model_test = Vec.vectorIC(np.array((0.0,0.0)))
model_test_np = model_test.getNdArray()
for ix,x_value in enumerate(x_samples):
	for iy,y_value in enumerate(y_samples):
		model_test_np[0] = x_value
		model_test_np[1] = y_value
		obj_ros_np[ix,iy]=Ros_prob.get_obj(model_test)

First we test a non-linear conjugate-gradient method in which a parabolic stepper with three-point interpolation is used. 

In [None]:
niter = 1000
Stop  = Stopper(niter=niter,tolr=1e-32,tolg=1e-32)
NLCGsolver = NLCG(Stop)
Ros_prob = Rosenbrock_prblm(x_init,y_init) #Resetting the problem
NLCGsolver.setDefaults(save_obj=True,save_model=True)
NLCGsolver.run(Ros_prob,verbose=True)
#Converting sampled points to arrays for plotting
x_smpld=[]
y_smpld=[]
for iter in range(len(NLCGsolver.model)):
    x_smpld.append(NLCGsolver.model[iter].getNdArray()[0])
    y_smpld.append(NLCGsolver.model[iter].getNdArray()[1])

Let's plot the optimization path taken by the algorithm, which converged to the global minimum in 199 iterations using a parabolic stepper.

In [None]:
fig,ax=plt.subplots()
# im=plt.imshow(obj_ros_np.T,cmap='jet',vmin=0.0,vmax=600,extent=[-1.5,1.5,-1.0,3.0])
plt.scatter(x_smpld,y_smpld,color='red',s=50,marker="+")
plt.plot(x_smpld,y_smpld,"--",color='red')
im=plt.imshow(obj_ros_np.T,cmap='jet',vmin=0.0,vmax=600,extent=[1.5,-1.5,-1.5,3.0])
plt.xlabel("x")
plt.ylabel("y")
plt.grid()
cs = plt.contour(obj_ros_np.T,levels=[0.05,0.1,0.5,2,10,50,125,250,500,1000],extent=[-1.5,1.5,3.0,-1.5],
                 colors="white",linewidths=(0.8,),linestyles=('--'))
plt.gca().invert_xaxis()
divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="5%", pad=0.1)
plt.colorbar(im, cax=cax)
ax.set_aspect('auto')

For the second test, we will test the steppest-descent approach using the same stepper.

In [None]:
NLSDsolver = NLCG(Stop,beta_type="SD")
Ros_prob = Rosenbrock_prblm(x_init,y_init) #Resetting the problem
NLSDsolver.setDefaults(save_obj=True,save_model=True)
NLSDsolver.run(Ros_prob,verbose=True)
#Converting sampled points to arrays for plotting
x_smpld=[]
y_smpld=[]
for iter in range(len(NLSDsolver.model)):
    x_smpld.append(NLSDsolver.model[iter].getNdArray()[0])
    y_smpld.append(NLSDsolver.model[iter].getNdArray()[1])

Let's again plot the optimization path. In this case, the algorithm finds only falls close to the vicinity of the global minimum but does not reach even after 1000 iteration. In the figure below, we can see that the algorithm is sampling most of the objective function within the parabolic valley.

In [None]:
fig,ax=plt.subplots()
# im=plt.imshow(obj_ros_np.T,cmap='jet',vmin=0.0,vmax=600,extent=[-1.5,1.5,-1.0,3.0])
plt.scatter(x_smpld,y_smpld,color='red',s=50,marker="+")
plt.plot(x_smpld,y_smpld,"--",color='red')
im=plt.imshow(obj_ros_np.T,cmap='jet',vmin=0.0,vmax=600,extent=[1.5,-1.5,-1.5,3.0])
plt.xlabel("x")
plt.ylabel("y")
plt.grid()
cs = plt.contour(obj_ros_np.T,levels=[0.05,0.1,0.5,2,10,50,125,250,500,1000],extent=[-1.5,1.5,3.0,-1.5],
                 colors="white",linewidths=(0.8,),linestyles=('--'))
plt.gca().invert_xaxis()
divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="5%", pad=0.1)
plt.colorbar(im, cax=cax)
ax.set_aspect('auto')

In the third test, let's apply the BFGS algorithm to find the function's global minimum.

In [None]:
ParabStep = StepperPar() #Again we use the same parabolic stepper as before
BFGSsolver = LBFGS(Stop,stepper=ParabStep)
Ros_prob = Rosenbrock_prblm(x_init,y_init) #Resetting the problem
BFGSsolver.setDefaults(save_obj=True,save_model=True)
BFGSsolver.run(Ros_prob,verbose=True)
#Converting sampled points to arrays for plotting
x_smpld=[]
y_smpld=[]
for iter in range(len(BFGSsolver.model)):
    x_smpld.append(BFGSsolver.model[iter].getNdArray()[0])
    y_smpld.append(BFGSsolver.model[iter].getNdArray()[1])

The algorithm has precisely reached the global minimum in 24 iterations. We can clearly see that it is able to find an approximation of the local curvature of the objective function. In fact, it needs to sample very few points within the parabolic-shaped valley. 

In [None]:
fig,ax=plt.subplots()
# im=plt.imshow(obj_ros_np.T,cmap='jet',vmin=0.0,vmax=600,extent=[-1.5,1.5,-1.0,3.0])
plt.scatter(x_smpld,y_smpld,color='red',s=50,marker="+")
plt.plot(x_smpld,y_smpld,"--",color='red')
im=plt.imshow(obj_ros_np.T,cmap='jet',vmin=0.0,vmax=600,extent=[1.5,-1.5,-1.5,3.0])
plt.xlabel("x")
plt.ylabel("y")
plt.grid()
cs = plt.contour(obj_ros_np.T,levels=[0.05,0.1,0.5,2,10,50,125,250,500,1000],extent=[-1.5,1.5,3.0,-1.5],
                 colors="white",linewidths=(0.8,),linestyles=('--'))
plt.gca().invert_xaxis()
divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="5%", pad=0.1)
plt.colorbar(im, cax=cax)
ax.set_aspect('auto')

Finally, let's test again the BFGS method but this time employing the line-search algorithm proposed by More and Thuente (1994). Their line-search method uses a backeting approach in which the strong Wolfe conditions are verified for the tested point. In this case, if these conditons are met, then the method was successful.

In [None]:
BFGSsolver = LBFGS(Stop)
Ros_prob = Rosenbrock_prblm(x_init,y_init) #Resetting the problem
BFGSsolver.setDefaults(save_obj=True,save_model=True)
BFGSsolver.run(Ros_prob,verbose=True)
#Converting sampled points to arrays for plotting
x_smpld=[]
y_smpld=[]
for iter in range(len(BFGSsolver.model)):
    x_smpld.append(BFGSsolver.model[iter].getNdArray()[0])
    y_smpld.append(BFGSsolver.model[iter].getNdArray()[1])

We can see that the algorithm has reached the global minimum in 36 iterations. However, since we employed a different stepping method, in which no parabolic interpolation is used during the optimization, the algorithm had to perfom only 41 objective function evaluations as opposed to 73 necessary by the BFGS method when the parabolic stepper was the line-search algorithm of choice.

In [None]:
fig,ax=plt.subplots()
# im=plt.imshow(obj_ros_np.T,cmap='jet',vmin=0.0,vmax=600,extent=[-1.5,1.5,-1.0,3.0])
plt.scatter(x_smpld,y_smpld,color='red',s=50,marker="+")
plt.plot(x_smpld,y_smpld,"--",color='red')
im=plt.imshow(obj_ros_np.T,cmap='jet',vmin=0.0,vmax=600,extent=[1.5,-1.5,-1.5,3.0])
plt.xlabel("x")
plt.ylabel("y")
plt.grid()
cs = plt.contour(obj_ros_np.T,levels=[0.05,0.1,0.5,2,10,50,125,250,500,1000],extent=[-1.5,1.5,3.0,-1.5],
                 colors="white",linewidths=(0.8,),linestyles=('--'))
plt.gca().invert_xaxis()
divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="5%", pad=0.1)
plt.colorbar(im, cax=cax)
ax.set_aspect('auto')