# Practising Optimization on Rosenbrock function with 2 and n variables 

### with minimize in scipy.optimize

1 - lambdify to value a derivative at a point x

2 - calculating Gradient Vector and Hessian Matrix of a 2 and n Variable function

3 - check at "minimized point", both function and Gradient are ZERO! 

4 - check at "minimized point", the Hessiam Matrix is positive definite (and semi - positive definite)

In [1]:
import numpy as np
from scipy.optimize import minimize
from sympy import *


In [2]:
x = Symbol('x')
y = Symbol('y')
z = x**2 + 1
zprime = z.diff(x)
print (zprime)

2*x


In [3]:
f   = lambdify(x, zprime, 'numpy')
print (f(10)) # value the derivative at x = 10
ccv = f(np.ones(5)) # value the derivative at x = 1, for 5 times
print (ccv)
ccv[0] = 105
ccv[-1] = 10
print (ccv)
print (ccv[0])
'''print ccv[-1]
print ccv[1:-1] # dal secondo al penultimo
print ccv[1:]   # dal secondo all ultimo
print ccv[:-1]  # dal primo al penultimo'''

20
[2. 2. 2. 2. 2.]
[105.   2.   2.   2.  10.]
105.0


'print ccv[-1]\nprint ccv[1:-1] # dal secondo al penultimo\nprint ccv[1:]   # dal secondo all ultimo\nprint ccv[:-1]  # dal primo al penultimo'

In [4]:

x2 = Symbol('x2')
x1 = Symbol('x1')

# 2 variable function f(x1, x2)
f  = 100*(x2 - x1**2)**2 + (1-x1)**2

# 1st derivatives - Gradient Vector
fprimex1 = f.diff(x1)
fprimex2 = f.diff(x2)

# 2nd derivatives  - Hessiam Matrix
fprimex11 = fprimex1.diff(x1)
fprimex22 = fprimex2.diff(x2)
fprimex12 = fprimex1.diff(x2)
fprimex21 = fprimex2.diff(x1)

print (fprimex1)
print (fprimex2)
print (fprimex11)
print (fprimex12)
print (fprimex22)
print (fprimex21)

-400*x1*(-x1**2 + x2) + 2*x1 - 2
-200*x1**2 + 200*x2
1200*x1**2 - 400*x2 + 2
-400*x1
200
-400*x1


In [5]:
def rosen(x):
    """The Rosenbrock function"""    
    x1 = x[0]
    x2 = x[1]
    return 100.0*(x2-x1**2.0)**2.0 + (1-x1)**2.0

In [6]:
x0 = np.array([1.3, 0.7])
res = minimize(rosen, x0, method='nelder-mead',options={'xtol': 1e-8, 'disp': True})

Optimization terminated successfully.
         Current function value: 0.000000
         Iterations: 79
         Function evaluations: 150


In [7]:
res

 final_simplex: (array([[1.        , 1.        ],
       [1.        , 0.99999999],
       [1.        , 1.        ]]), array([3.37360776e-18, 9.41642436e-18, 2.25070018e-17]))
           fun: 3.3736077629532093e-18
       message: 'Optimization terminated successfully.'
          nfev: 150
           nit: 79
        status: 0
       success: True
             x: array([1., 1.])

In [8]:
print(res.x)
print(res.fun)

[1. 1.]
3.3736077629532093e-18


In [9]:
x0 = np.array([0, 0])
res = minimize(rosen, x0, method='nelder-mead',options={'xtol': 1e-8, 'disp': True})
print(res.x) 

Optimization terminated successfully.
         Current function value: 0.000000
         Iterations: 110
         Function evaluations: 206
[1. 1.]


In [10]:
print(res.fun)
print (rosen(res.x))

2.650458998741964e-18
2.650458998741964e-18


In [11]:
v2 = np.array([0.9999564, 0.9999085])
print (rosen(v2))
v2 = np.array([1., 1.])
print (rosen(v2))

3.751595186883519e-09
0.0


In [12]:
# this is the Gradient vector 
def rosen_der(x):
    der = np.zeros_like(x)
    der[0] = -400*x[0]*(x[1] -x[0]**2 ) + 2*x[0] - 2
    der[1] = 200*(x[1] -x[0]**2)
    return der

In [13]:
res2 = minimize(rosen, x0, method='BFGS', jac=rosen_der, options={'disp': True})

Optimization terminated successfully.
         Current function value: 0.000000
         Iterations: 19
         Function evaluations: 24
         Gradient evaluations: 24


In [14]:
print(res2.x)
print(res2.fun)

[0.99999913 0.99999825]
7.71728835853869e-13


In [15]:
# this is the Hessian Matrix 
def rosen_hess(x):
    H = np.array([2, 2])   
    H00 =  1200*x[0]**2 - 400*x[1] + 2
    H11 =  200
    H01 =  -400*x[0]
    H10 =  -400*x[0]         
    H = [[H00, H01],[H10, H11]]    
    return H

In [16]:
# function, Gradient and Hessian at x = x0
print (rosen(x0))
print (rosen_der(x0))
print (rosen_hess(x0))

print (" -------------------------------- ")
# function, jacobian and Hessian at x = optimized point
# note 
# the function is at the "loacal" minimum
# the gradient vector is zero
print (rosen(res2.x))
print (rosen_der(res2.x))
print (rosen_hess(res2.x))

1.0
[-2  0]
[[2, 0], [0, 200]]
 -------------------------------- 
7.71728835853869e-13
[ 3.92841201e-06 -2.83120873e-06]
[[801.9986184625773, -399.9996531998896], [-399.9996531998896, 200]]


In [17]:
res3 = minimize(rosen, x0, method='Newton-CG',jac=rosen_der, hess=rosen_hess,options={'xtol': 1e-8, 'disp': True})

Optimization terminated successfully.
         Current function value: 0.000000
         Iterations: 35
         Function evaluations: 55
         Gradient evaluations: 89
         Hessian evaluations: 35


In [18]:
print (res3.x)

[1. 1.]


In [19]:
def rosen_Vector(x):
    """The Rosenbrock function"""
    return sum(100.0*(x[1:]-x[:-1]**2.0)**2.0 + (1-x[:-1])**2.0)

In [20]:
def rosen_FirstDerivativeVector(x):
    """The Rosenbrock function derivative"""
    xm = x[1:-1]
    xm_m1 = x[:-2]
    xm_p1 = x[2:]
    der = np.zeros_like(x)
    der[1:-1] = 200*(xm-xm_m1**2) - 400*(xm_p1 - xm**2)*xm - 2*(1-xm)
    der[0] = -400*x[0]*(x[1]-x[0]**2) - 2*(1-x[0])
    der[-1] = 200*(x[-1]-x[-2]**2)
    return der

In [21]:
def rosen_SecondHessianMatrix(x):
    x = np.asarray(x)
    H = np.diag(-400*x[:-1],1) - np.diag(400*x[:-1],-1)
    diagonal     = np.zeros_like(x)
    diagonal[0]  = 1200*x[0]**2-400*x[1]+2
    diagonal[-1] = 200
    diagonal[1:-1] = 202 + 1200*x[1:-1]**2 - 400*x[2:]
    H = H + np.diag(diagonal)
    return H

In [22]:
x0 = np.array([1.3, 0.7, 0.8, 1.9, 1.2])
print (x0)

[1.3 0.7 0.8 1.9 1.2]


In [23]:
x0 = np.array([1.3, 0.7, 0.8, 1.9, 1.2])
res = minimize(rosen_Vector, x0, method='nelder-mead',options={'xtol': 1e-8, 'disp': True})

Optimization terminated successfully.
         Current function value: 0.000000
         Iterations: 339
         Function evaluations: 571


In [24]:
print (res.x)

[1. 1. 1. 1. 1.]


In [25]:
print (rosen_Vector(x0))
print (rosen_FirstDerivativeVector(x0))
print (rosen_SecondHessianMatrix(x0))

print (" ------------------------------------------------ ")
print (rosen_Vector(res.x))
print (rosen_FirstDerivativeVector(res.x)) # the gradient vector is zero!
print (rosen_SecondHessianMatrix(res.x))

848.22
[ 515.4 -285.4 -341.6 2085.4 -482. ]
[[1750. -520.    0.    0.    0.]
 [-520.  470. -280.    0.    0.]
 [   0. -280.  210. -320.    0.]
 [   0.    0. -320. 4054. -760.]
 [   0.    0.    0. -760.  200.]]
 ------------------------------------------------ 
4.861153433422115e-17
[ 4.99059794e-08 -6.09594775e-08  2.66108494e-07 -8.84030519e-08
 -1.45017776e-08]
[[ 802.0000007  -400.00000016    0.            0.            0.        ]
 [-400.00000016 1002.00000107 -400.00000028    0.            0.        ]
 [   0.         -400.00000028 1002.00000262 -400.00000059    0.        ]
 [   0.            0.         -400.00000059 1002.0000038  -400.00000094]
 [   0.            0.            0.         -400.00000094  200.        ]]


In [26]:
res2 = minimize(rosen_Vector, x0, method='BFGS', jac=rosen_FirstDerivativeVector, options={'disp': True})
print (res2)

Optimization terminated successfully.
         Current function value: 0.000000
         Iterations: 25
         Function evaluations: 30
         Gradient evaluations: 30
      fun: 4.0130879949972905e-13
 hess_inv: array([[0.00758796, 0.01243893, 0.02344025, 0.04614953, 0.09222281],
       [0.01243893, 0.02481725, 0.04712952, 0.09298607, 0.18569385],
       [0.02344025, 0.04712952, 0.09456412, 0.18674836, 0.37282072],
       [0.04614953, 0.09298607, 0.18674836, 0.37383212, 0.74621435],
       [0.09222281, 0.18569385, 0.37282072, 0.74621435, 1.49444705]])
      jac: array([-5.68982937e-06, -2.73296557e-06, -2.54520599e-06, -7.73460770e-06,
        5.78142698e-06])
  message: 'Optimization terminated successfully.'
     nfev: 30
      nit: 25
     njev: 30
   status: 0
  success: True
        x: array([1.00000004, 1.0000001 , 1.00000021, 1.00000044, 1.00000092])


In [27]:
print (res2.x)
print (" ------------------------------------------------ ")
print (rosen_Vector(res2.x))
print (rosen_FirstDerivativeVector(res2.x))   # the Gradient vector matches the one from minimize output!

from numpy.linalg import inv
print (rosen_SecondHessianMatrix(res2.x))  
print (inv(rosen_SecondHessianMatrix(res2.x)))# the Hassian Inverse is the same (or almost) 

#as the one from minimize output!
print (" ------------------------------------------------ ")
print (res2.hess_inv)

[1.00000004 1.0000001  1.00000021 1.00000044 1.00000092]
 ------------------------------------------------ 
4.0130879949972905e-13
[-5.68982937e-06 -2.73296557e-06 -2.54520599e-06 -7.73460770e-06
  5.78142698e-06]
[[ 802.00006255 -400.00001708    0.            0.            0.        ]
 [-400.00001708 1002.00015393 -400.00003994    0.            0.        ]
 [   0.         -400.00003994 1002.00033689 -400.00008569    0.        ]
 [   0.            0.         -400.00008569 1002.00069752 -400.00017727]
 [   0.            0.            0.         -400.00017727  200.        ]]
[[0.00741848 0.01237406 0.02357854 0.0466902  0.09338044]
 [0.01237406 0.02481    0.04727498 0.09361385 0.18722779]
 [0.02357854 0.04727498 0.0948453  0.18781252 0.3756252 ]
 [0.0466902  0.09361385 0.18781252 0.37685657 0.75371348]
 [0.09338044 0.18722779 0.3756252  0.75371348 1.51242763]]
 ------------------------------------------------ 
[[0.00758796 0.01243893 0.02344025 0.04614953 0.09222281]
 [0.01243893 0.02481

In [28]:
# Positive definite (PD) matrices.
# Cholesky decomposition is a good option if you're working with positive definite (PD) matrices.
# Good test for positive definiteness (actually the standard one !) is to try to compute its Cholesky factorization. 
# It succeeds iff your matrix is positive definite.
# However, it throws the following error on positive semi-definite (PSD) matrix

# Positive definite (PD) matrices.
print (np.linalg.cholesky(res2.hess_inv))
print (" ------------------------------------------- ")
print (np.linalg.cholesky(rosen_SecondHessianMatrix(res2.x)))

[[0.08710889 0.         0.         0.         0.        ]
 [0.14279747 0.06652918 0.         0.         0.        ]
 [0.26909133 0.13082924 0.07097662 0.         0.        ]
 [0.52979131 0.26053555 0.14230657 0.07087568 0.        ]
 [1.05870723 0.51876688 0.28265517 0.1402394  0.07004248]]
 ------------------------------------------- 
[[ 28.31960562   0.           0.           0.           0.        ]
 [-14.12449108  28.32841163   0.           0.           0.        ]
 [  0.         -14.12010123  28.33060321   0.           0.        ]
 [  0.           0.         -14.11901055  28.33115314   0.        ]
 [  0.           0.           0.         -14.11873972   0.81313508]]


In [29]:
import scipy

# Positive Semi definite (PdD) matrices.
# For PSD matrices, you can use scipy/numpy's eigh() to check that all eigenvalues are non-negative.
E,V = scipy.linalg.eigh(res2.hess_inv)
print (np.around(E, decimals=4, out=None))

E,V = scipy.linalg.eigh(rosen_SecondHessianMatrix(res2.x))
print (np.around(E, decimals=4, out=None))

[6.0000e-04 8.0000e-04 1.4000e-03 3.1000e-03 1.9894e+00]
[4.9730000e-01 3.5463040e+02 7.5459480e+02 1.2490982e+03 1.6491805e+03]


In [30]:
res3 = minimize(rosen_Vector, x0, method='Newton-CG', jac=rosen_FirstDerivativeVector, 
                hess=rosen_SecondHessianMatrix, options={'disp': True})
print (res3.x)
print (res3)

Optimization terminated successfully.
         Current function value: 0.000000
         Iterations: 21
         Function evaluations: 30
         Gradient evaluations: 50
         Hessian evaluations: 21
[0.9999852  0.9999705  0.99994098 0.99988173 0.99976293]
     fun: 1.859665471986481e-08
     jac: array([ 5.86892970e-05,  1.54121807e-04,  6.93311785e-04,  3.04308913e-03,
       -1.87051318e-03])
 message: 'Optimization terminated successfully.'
    nfev: 30
    nhev: 21
     nit: 21
    njev: 50
  status: 0
 success: True
       x: array([0.9999852 , 0.9999705 , 0.99994098, 0.99988173, 0.99976293])


In [31]:
print (rosen_FirstDerivativeVector(res3.x))
print( rosen_SecondHessianMatrix(res3.x))

[-6.81376243e-05 -3.03701999e-05 -3.48177529e-05 -5.76854082e-05
 -1.11414771e-04]
[[ 801.97628364 -399.9940805     0.            0.            0.        ]
 [-399.9940805  1001.95280859 -399.98819963    0.            0.        ]
 [   0.         -399.98819963 1001.90565171 -399.97639025    0.        ]
 [   0.            0.         -399.97639025 1001.81100993 -399.95269398]
 [   0.            0.            0.         -399.95269398  200.        ]]


In [32]:
print (rosen_Vector(res2.x))
print (rosen_Vector(res3.x))

4.0130879949972905e-13
1.859665471986481e-08
