# Tutorial 2

# Imported modules

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
from numpy import linalg as LA
from scipy import optimize
import sys

In [None]:
def output_banner():
    print(' Iter   Nfev     Step       Objective    Norm of g')


In [None]:
def output_iteration_info(k, nf, t, f, g):
    print('{0:5d} {1:6d} {2:10e} {3:10e} {4:10e}'.format(k, nf, t, f, LA.norm(g, np.inf)))

In [None]:
def output_final_results(x, f, g, nf, ng, nh, k):   
    print('\n')
    print('          x:', x)
    print('        fun:', f)
    print('        jac:', g)
    print('norm of jac:', LA.norm(g, np.inf))
    print('       nfev:', nf)
    print('       ngev:', ng)
    print('       nhev:', nh)
    print('        nit:', k)


# Example 1: The Rosenbrock function

## Objective function

In [None]:
def objective(x):
    """Rosen function"""
    return 100*(x[1]-x[0]**2)**2 + (1-x[0])**2

In [None]:
x = np.linspace(-5, 5, 1000)
y = np.linspace(-5, 5, 1000)
X, Y = np.meshgrid(x, y)
Z = objective(np.vstack([X.ravel(), Y.ravel()])).reshape((1000,1000))
plt.contour(X, Y, Z, np.arange(10)**5, cmap='RdGy')
plt.colorbar();
plt.text(1, 1, 'x', va='center', ha='center', color='red', fontsize=20);

### The gradient

In [None]:
def gradient(x):
    """Derivative of Rosenbrock's function."""
    return np.array([    
        400 * (x[0]**2 - x[1]) * x[0] + 2*(x[0]-1),
        200 * (x[1] - x[0]**2)
    ])

### The Hessian

In [None]:
def hessian(x):
    """Hessian of Rosenbrock's function."""
    return np.array([
        [2 - 400 * (x[1] - 3 * x[0]**2), -400 * x[0]],
        [                   -400 * x[0],         200]
    ])

# A first approach (See Tutorial 1)

In [None]:
def steepest_descent_AllInOne(objective, gradient, x0):
    """Implements simple gradient descent for the Rosen function."""

    maxiter = 20000

    dxmax = 1

    c1 = .0001
    beta = .5
    x = x0
    f = objective(x); nf = 1
    g = gradient(x); ng = 1
    
    k = 0
    
    #
    output_banner()
    while ((LA.norm(g, np.inf) > 1e-6) and (k < maxiter)):
        d = -g
        t = min(1, dxmax/LA.norm(g, np.inf))
        xnew = x + t * d
        fnew = objective(xnew)
        nf = nf + 1
        j = 1
        while ((fnew > f + t * c1 * np.inner(g,d)) and (j <= 15)):
            t = t * beta
            xnew = x + t * d
            fnew = objective(xnew); nf += 1
            j  += 1
        if j > 15:
            print('Armijo failed to make progress')
            return
        if (k%100 == 1):
            print('{0:5d} {1:6d} {2:10e} {3:10e} {3:10e}'.format(k, nf, t, f, LA.norm(g, np.inf)))
        x = xnew
        f = fnew
        g = gradient(x); ng += 1
        k += 1

    output_final_results(x, f, g, nf, ng, 0, k);
    return x, f, g


In [None]:
steepest_descent_AllInOne(objective,gradient,[-1.2, -1.2]);

# Algorithms

## Linesearches

### Armijo

In [None]:
def armijo(obj, grad, x0, f0, g0, t0, d, nf, ng):
    
    """
    
    """

    c1 = 1e-4 
    
    iterMax = 20
    
    gtd0 = np.inner(g0,d)
    
    if (gtd0 >= 0):
        print('ARMIJO: Direction provided is not a descent direction.')
        sys.exec(1)
    
    t = t0

    for k in range(iterMax):
        x = x0 + t*d
        f = obj(x)
        if (f < f0 + c1*t*gtd0):
            g = grad(x)
            return x, f, g, t, nf + k + 1, ng + 1
        else:
            t = t/2
    
    print("ARMIJO: Maximum Iterations exceeded.")
    sys.exec(1)

### Wolfe

In [None]:
def wolfe(obj, grad, x0, f0, g0, t0, d, nf, ng):
    
    """
    """
    
    c1 = 1e-4;
    c2 = 0.90;
    
    iterMax = 20
    
    a  = 0
    b  = np.inf
    gtd0 = g0.T @ d

    if (gtd0 >= 0):
        print('WOLFE: Direction not a descent direction.')

    t  = t0;

    for k in range(iterMax):
        x = x0 + t*d
        f = obj(x); nf += 1
        if (f > f0 + c1*t*gtd0):
            b = t
            t = (a+b)/2
        elif (np.inner((g := grad(x)), d) < c2*gtd0):
            print(np.inner((g := grad(x)),d) < c2*gtd0)
            ng = ng + 1
            a = t
            if (b == np.inf):
                t = 2*t
            else:
                t = (a+b)/2
        else:
            return x, f, grad(x), t, nf, ng+1

    print("WOLFE: Maximum Iterations exceeded.")
    sys.exec(1)

# Descent directions

## Steepest descent

In [None]:
def steepest_descent(objective, gradient, linesearch, x0):
    
    """Implements simple gradient descent for the Rosen function."""

    maxiter = 20000

    #
   
    x = x0
    f = objective(x); nf = 1
    g = gradient(x); ng = 1
    
    dxmax = 1
    
    k = 0
    
    output_banner()
    while ((LA.norm(g, np.inf) > 1e-6) and (k < maxiter)):
        t = min(1, dxmax/LA.norm(g, np.inf))
        d = -g
        x, f, g, t, nf, ng = armijo(objective, gradient, x, f, g, 1, d, nf, ng)
        k += 1
        if (k%100 == 1): output_iteration_info(k, nf, t, f, g)

    output_final_results(x, f, g, nf, ng, 0, k);
    return x, f, g, nf, ng, k;

In [None]:
steepest_descent(objective,gradient, wolfe, [-1.2, 1]);

## Newton's method

In [None]:
def newton(objective, gradient, hessian, x0):
    """Implements simple gradient descent for the Rosen function."""

    maxiter = 100
    sigma = 1e-4
    beta = .5

    x = x0
    f = objective(x); nf = 1
    g = gradient(x); ng = 1
    h = hessian(x); nh = 1
    
    k = 0
    
    output_banner()
    
    while ((LA.norm(g, np.inf) > 1e-10) and (k <= maxiter)):
        d = - LA.solve(h,g)
        t = 1
        xnew = x + t * d
        fnew = objective(xnew); nf += 1
        j = 1
        while ((fnew > f + t * sigma * np.inner(g,d)) and (j <= 15)):
            t = t * beta
            xnew = x + t * d
            fnew = objective(xnew); nf += 1
            j += 1
        if j > 15:
            print('Armijo failed to make progress')
            return
        x = xnew
        f = fnew
        g = gradient(x); ng += 1 
        h = hessian(x); nh +=1
        k += 1
        output_iteration_info(k, nf, t, f, g)


    output_final_results(x, f, g, nf, ng, nh, k)
    return x, f, g, nf, ng, nh, k


In [None]:
newton(objective,gradient,hessian,[-1.2, 1]);

In [None]:
optimize.minimize(objective, [-1.2, 1], method="Newton-CG", jac=gradient, hess=hessian)    

## BFGS

In [None]:
def BFGSWolfe(objective, gradient, x0):

    maxIter = 500;

    eps = 1e-6;
    
    x = x0;   

    f = objective(x); nf = 1
    g = gradient(x); ng = 1

    I = np.eye(len(x))
    H = I

    output_banner()
    
    k = 1

    while ((LA.norm(g, np.inf) > eps) and (k < maxIter)):
        d = - np.dot(H, g)
        xnew, fnew, gnew, t, nf, ng = wolfe(objective,gradient,x,f,g,1,d,nf,ng);
        s  = xnew - x
        y  = gnew - g
        r  = 1/np.dot(y,s)
        H  = np.dot((I - r * np.outer(s,y)), np.dot(H, (I - r * np.outer(y,s)))) + r * np.outer(s,s)
        x  = xnew
        f  = fnew
        g  = gnew
        k += 1
        output_iteration_info(k, nf, t, f, g)
    
    output_final_results(x, f, g, nf, ng, 0, k)
    return x, f, g, nf, ng, 0, k

In [None]:
BFGSWolfe(objective,gradient,[50, 50]);

In [None]:
optimize.minimize(objective, [50, 50], method="BFGS", jac=gradient)    

## Conjugate gradient

In [None]:
optimize.minimize(objective, [12, 12], method="CG", jac=gradient)    

# A second example

## Objective function

In [None]:
def f(x):
    return x[0]**4/4 - x[0]**2 + 2*x[0] + (x[1]-1)**2

## Gradient

In [None]:
def gradient(x):
    return np.array([x[0]**3 - 2*x[0] + 2, 2*(x[1]-1)])

## Hessian

In [None]:
def hessian(x):
    return np.array([[3*x[0]**2-2,  0], [0, 2]])

# To go further

1. Test the developed functions on the generalized Rosenbrock function
2. Code the conjugate-gradient algorithm
2. Code a modified Newton direction algorithm to deal with the case in which the pure Newton direction is not a descent direction
3. Code the stochastic gradient descent
4. Experiment with other test functions (see provided file)