In [55]:
#I have recoded the newton and BFGS code in order to more explicitly take advantage of linesearch
# and to separate them from each other

function sup_norm(x)                    #easier to read later code with this defined
    return maximum(abs(x))
end

function test_armijo(f,g,new_f,alpha,p)
    c = 1e-4
    
    return new_f - f <= c*alpha*(g'*p)[1,1]        #satisfies Armijo condition?
end


function newtmin( obj, x0; maxIts=100, optTol=1e-6)
# Minimize a function f using Newton’s method.
# obj: a function that evaluates the objective value, gradient, and Hessian at a point x, i.e.,
#    (f, g, H) = obj(x)
# x0: starting point.
# maxIts (optional): maximum number of iterations.
# optTol (optional): optimality tolerance based on ||grad(x)|| <= optTol*||grad(x0)||

    its = 0;
    x = x0;
    
    (f,g,H) = obj(x)     #evaluate gradient, et al
      
    ngx0 = sup_norm(g)           #need to keep this value for stopping condition
    
    if(ngx0 > optTol^2)      #trap for low grad(x0)
        
        while(its < maxIts && sup_norm(g) > optTol*(ngx0))
            
            Hessian_modded = false
            
            p = -H \ g           #compute descent direction
            
            alpha = 1
            alpha_min = 0
            alpha_max = 1
            
            new_x = x + alpha*p  #trial step
            
            
            (new_f,new_g,_) = obj(new_x)
            
            armijo = test_armijo(f,g,new_f,alpha,p)
                
            while(!armijo)
                
                if(!Hessian_modded)
                    #force the Hessian to be positive definite
                    
                    Hessian_modded = true
                
                    (V, S) = eig(H)                    #decompose H
                
                    if(minimum(V) < 0)                 #if not positive definite
                    
                        #V[V .<= 0] = maximum(V)        #do not explore areas of negative curvature
                        
                        V = V + abs(1.01*minimum(V))
                    
                        V_inv = diagm(1 ./ V)
                    
                        p = -S*V_inv*S'*g              #recalculate p    
                        
                    end
                
                else    #Hessian was already fixed
             
                    alpha = alpha/2                #not enough decrease; try a smaller step
    
                end
                
                new_x = x + alpha*p                   #trial step
            
                (new_f,new_g,_) = obj(new_x)      #evaluate new gradient, et al
                
                
                armijo = test_armijo(f,g,new_f,alpha,p)
            
                if (alpha < 0.01)       #interval is too narrow
                    break        #break out of loop; just take a step and hope for the best
                end
            
            end        #terminate linesearch
            
            (_,_,H) = obj(new_x)
            
            x = new_x           #commit to the step
            f = new_f
            g = new_g
            
            Hessian_modded = false
            
            its = its + 1
            
        end
    
    else
        println("||grad(x0)|| is already < ", optTol^2)
    end
    
    return (x, its)
    
end

function test_wolfe(f,g,new_f,new_g,alpha, p)
    c1 = 1e-4
    c2 = 0.9                 #curvature condition constant
    
    armijo = new_f - f <= c1*alpha*(g'*p)[1,1]            #satisfies Armijo condition?
    curvature = abs(new_g'*p)[1,1] <= -c2*(g'*p)[1,1]     #satisfies curvature condition?
    downhill = (new_g'*p)[1,1] < 0
    
    return (armijo,curvature, downhill)
end

function BFGSmin( obj, x0; maxIts=100, optTol=1e-6)
# Minimize a function f using BFGS.
# obj: a function that evaluates the objective value and gradient at a point x, i.e.,
#    (f, g) = obj(x)
# x0: starting point.
# maxIts (optional): maximum number of iterations.
# optTol (optional): optimality tolerance based on ||grad(x)|| <= optTol*||grad(x0)||

    
    its = 0;
    x = x0;
    
    (f,g) = obj(x)
    n = size(g)[1]
    H_inv = eye(n)/norm(g)        #approximate Hessian as scaled identity
      
    ngx0 = sup_norm(g)           #need to keep this value for stopping condition
    
    if(ngx0 > optTol^2)      #trap for low grad(x0)
        
        while(its < maxIts && sup_norm(g) > optTol*(ngx0))
            
            p = -H_inv * g           #compute descent direction
                        
            alpha = 1
            alpha_min = 0
            alpha_max = 1
            
            new_x = x + alpha*p  #trial step
            
            
            (new_f,new_g) = obj(new_x)
            
            (armijo, curvature, downhill) = test_wolfe(f,g,new_f,new_g,alpha,p)
                
            while(!armijo || !curvature)
                
                if (!armijo || !downhill)
                    alpha_max = alpha                 #slow down, not enough decrease/uphill
                else
                    alpha_min = alpha                 #on a downhill, need to speed up
                end
                             
                alpha = 0.5*(alpha_max + alpha_min)   #try an intermediate value
                
                new_x = x + alpha*p                   #trial step
            
                (new_f,new_g) = obj(new_x)      #evaluate new gradient, et al
                
                
                (armijo, curvature, downhill) = test_wolfe(f,g,new_f,new_g,alpha,p)
            
                if (alpha_max - alpha_min < 0.01)       #interval is too narrow
                    break        #break out of loop; just take a step and hope for the best
                end
            
            end        #terminate linesearch
            
            
            #calculate Hessian
            y = new_g - g
            s = alpha*p
            ro = 1 / (y'*s)[1,1]
            H_inv = (eye(n) - ro*s*y')*H_inv*(eye(n) - ro*y*s') + ro*s*s'    
            
            x = new_x           #commit to the step
            f = new_f
            g = new_g
            
            its = its + 1
            
        end
    
    else
        println("||grad(x0)|| is already < ", optTol^2)
    end
    
    return (x, its)
    
end

BFGSmin (generic function with 1 method)

In [56]:
function AugLagrangeMin(f, g, H, C, Jt, x0;
    eta = n -> 1/n, maxIts = 100, maxK=100, maxP = 1e10, optTol=1e-6, BFGS = false)
        
    k = 1
    its = 0
    p = 1e-4
    x = x0
    y = 0*C(x)
    
    while((sup_norm(C(x)) > optTol || sup_norm(g(x)-Jt(x)*y) > optTol) && k < maxK && p <= maxP)
        
        if(BFGS)
            (x, new_its) = BFGSmin(x -> ( f(x) + p/2 * norm(C(x)-1/p*y)^2, g(x) + Jt(x)*(p*C(x)-y)), x;
                maxIts = maxIts, optTol=optTol)
        else
            (x, new_its) = newtmin(x -> ( f(x) + p/2 * norm(C(x)-1/p*y)^2,
                                          g(x) + Jt(x)*(p*C(x)-y),
                                          H(x)+p*Jt(x)*Jt(x)'), x;
                                         maxIts = maxIts, optTol=optTol)
        end
        
        its = its + new_its
        
        if(sup_norm(C(x)) <= eta(k))
            y = y - p*C(x)
        else
            p = 2*p
        end
        
        k = k + 1
    end

    return (x, k, its)
end

AugLagrangeMin (generic function with 1 method)

In [57]:
#Hock-Schittkowki 6
#
# min (1-x1)^2
# s/t 10(x2-x1)^2 = 0
#
# start: (-1.2,1)
# solution: (1,1) -> 0

f = x -> 1-x[1,1]^2
g = x -> [2*x[1,1]-2; 0]
H = x -> [2 0; 0 0]
C = x -> 10*(x[2,1]-x[1,1])^2
Jt = x -> [-20*(x[2,1]-x[1,1]); 20*(x[2,1]-x[1,1])]
x0 = [-1.2, 0]

(x, k, its) = AugLagrangeMin(f,g,H,C,Jt,x0)

println("Newton solution: ", x, " -> ", f(x), " with ", its, " iterations")

(x, k, its) = AugLagrangeMin(f,g,H,C,Jt,x0, BFGS = true)

println("BFGS solution: ", x, " -> ", f(x), " with ", its, " iterations")

#here Newton vastly outperforms the BFGS algorithm

Newton solution: [1.0,1.0] -> 0.0 with 18 iterations
BFGS solution: [1.0000000703774097,1.0001098935275672] -> -1.4075482424047436e-7 with 400 iterations


In [58]:
#Hock-Schittkowki 7
#
# min ln(1+x1^2)-x2
# s/t (1 + x1^2)^2 + x2^2 - 4 = 0
#
# start: (2,2)
# solution: (0,1.73205) -> -1.73205

f = x -> log(1+x[1,1]^2)-x[2,1]
g = x -> [2*x[1,1]/(1+x[1,1]^2); -1]
H = x -> [(2-2*x[1,1]^2)/(1+x[1,1]^2)^2 0; 0 0]
C = x -> (1+x[1,1]^2)^2+x[2,1]^2-4
Jt = x -> [4x[1,1]*(1+x[1,1]^2),2*x[2,1]]
x0 = [2, 2]

(x, k, its) = AugLagrangeMin(f,g,H,C,Jt,x0)

println("Newton solution: ", x, " -> ", f(x), " with ", its, " iterations")

(x, k, its) = AugLagrangeMin(f,g,H,C,Jt,x0, BFGS = true)

println("BFGS solution: ", x, " -> ", f(x), " with ", its, " iterations")

#Here Newton is not able to produce a satisfactory answer.  In fact, due to the logarithm,
#you can see from the Hessian that as x1 -> 1, the Hessian is no longer positive semidefinite.

Newton solution: [4316.245751202957,8.06905734337049e8] -> -8.069057175967665e8 with 4700 iterations
BFGS solution: [-3.4597465383013144e-16,1.7320508124038025] -> -1.7320508124038025 with 127 iterations


In [59]:
#Hock-Schittkowki 8
#
# min -1
# s/t x1^2+x2^2 - 25 = 0
#     x1x2 - 9 = 0
#
# start: (2,1)
# solution: x = (4.60159, 1.95585), -x, and their symmetric reflections -> -1

f = x -> -1
g = x -> [0;0]
H = x -> [0 0;0 0]
C = x -> [x[1,1]^2+x[2,1]^2 - 25; x[1,1]*x[2,1]-9]
Jt = x -> [2x[1,1] x[2,1]; 2x[2,1] x[1,1]]
x0 = [2, 1]

(x, k, its) = AugLagrangeMin(f,g,H,C,Jt,x0)

println("Newton solution: ", x, " -> ", f(x), " with ", its, " iterations")

(x, k, its) = AugLagrangeMin(f,g,H,C,Jt,x0, BFGS = true)

println("BFGS solution: ", x, " -> ", f(x), " with ", its, " iterations")

#Here again Newton vastly outperforms the slower BFGS algorithm for this problem with quadratic constraints

Newton solution: [4.601594918576383,1.9558436075096002] -> -1 with 4 iterations
BFGS solution: [4.601594811360296,1.9558437338668564] -> -1 with 212 iterations


In [60]:
#Hock-Schittkowki 9
#
# min sin(pi*x1/12)cos(pi*x2/16)
# s/t 4x1-3x2=0
#
# start: (0,0)
# solution: (12k-3,16k-4) -> -1/2, k in Z

f = x -> sin(pi*x[1,1]/12)*cos(pi*x[2,1]/16)
g = x -> [pi/12*cos(pi*x[1,1]/12)*cos(pi*x[2,1]/16); -pi/16*sin(pi*x[1,1]/12)*sin(pi*x[2,1]/16)]
H = x -> [-pi^2/12^2*sin(pi*x[1,1]/12)*cos(pi*x[2,1]/16) -pi^2/(12*16)*cos(pi*x[1,1]/12)*sin(pi*x[2,1]/16);
          -pi^2/(12*16)*cos(pi*x[1,1]/12)*sin(pi*x[2,1]/16) -pi^2/16^2*sin(pi*x[1,1]/12)*cos(pi*x[2,1]/16)]
C = x -> 4x[1,1]-3x[2,1]
Jt = x -> [4; -3]
x0 = [0, 0]

(x, k, its) = AugLagrangeMin(f,g,H,C,Jt,x0)

println("Newton solution: ", x, " -> ", f(x), " with ", its, " iterations")

(x, k, its) = AugLagrangeMin(f,g,H,C,Jt,x0, BFGS = true)

println("BFGS solution: ", x, " -> ", f(x), " with ", its, " iterations")

#For this very nonconvex problem, Newton again has difficulty, and does not even converge
#but the BFGS algorithm efficiently arrives at the answer

Newton solution: [1.3582536483894607e18,1.811004864519281e18] -> -0.29094937310538416 with 9900 iterations
BFGS solution: [-3.0000001405311156,-4.000000141829025] -> -0.500000004471447 with 100 iterations
