# Chapter 4: Local Descent

In [1]:
import numpy as np
import jax
import cvxpy as cp

## Algorithm 4.1

In [2]:
def bracket_minimum(f, x=0, s=1e-2, k=2.0):
    a, ya = x, f(x)
    b, yb = a + s, f(a+s)
    
    if yb > ya:
        a, b = b, a
        ya, yb = yb, ya
        s = -s
    
    while True:
        c, yc = b + s, f(b+s)
        if yc > yb:
            return (a,c) if a < c else (c, a)
        a, ya, b, yb = b, yb, c, yc

def minimize(f_deriv, a, b, epsilon):
    '''
    Bisection algorithm for univariate optmization
    '''
    if a > b:
        a, b = b, a
    
    ya, yb = f_deriv(a), f_deriv(b)

    if ya == 0:
        b = a
    if yb == 0:
        a = b

    while b - a > epsilon:
        x = (a+b)/2
        y = f_deriv(x)
        if y == 0:
            a, b = x, x
        elif y*ya> 0:
            a = x
        else:
            b = x
    
    return (a+b)/2

def line_search(f, x, d):
    '''
    One step of Exact Line Search
    '''
    objective = lambda alpha: f(x+alpha*d)

    a, b = bracket_minimum(objective)

    alpha = minimize(objective, a, b, 0.0001)

    return x + alpha*d

### Example

In [3]:
def func(x):
    return np.sin(x[0]*x[1])+np.exp(x[1]+x[2])-x[2]

x = np.array([1.0, 2.0, 3.0])
d = np.array([0.0, -1.0, -1.0])

sol = line_search(func, x, d)
print(sol)

[ 1.         -1.13996094 -0.13996094]


## Algorithm 4.2

In [4]:
def backtracking_line_search(f, gradient, x, d, alpha, p=0.5, beta=1e-4):
    y, g = f(x), gradient
    while f(x+alpha*d) > y + beta*alpha*(np.dot(g,d)):
        alpha *= p
    return alpha

### Example

In [5]:
def aproximate_line_search(f, x, alpha):
    '''
    One Step of Aproximate Line Search with Backtracking
    '''
    gradient = jax.grad(f)
    d = -gradient(x)

    alpha = backtracking_line_search(f, gradient, x, d, alpha)

    return x + alpha*d

def func(x):
    return jax.numpy.sin(x[0]*x[1])+jax.numpy.exp(x[1]+x[2])-x[2]

x = np.array([1.0, 2.0, 3.0])
alpha = 1.0
sol = line_search(func, x, alpha)
print(sol)

[-1.37003906 -0.37003906  0.62996094]


## Algorithm 4.3

In [6]:
def strong_backtracking(f, gradient, x, d, alpha=1, beta=1e-4, sigma=0.1):
    y0, g0, y_prev, alpha_prev = f(x), np.dot(gradient(x),d), np.nan, 0
    alpha_lo, alpha_hi = np.nan, np.nan

    while True:
        y = f(x+alpha*d)
        if y > y0+beta*alpha*g0 or (not np.isnan(y_prev) and y >= y_prev):
            alpha_lo, alpha_hi = alpha_prev, alpha
            break
        g = np.dot(gradient(x+alpha*d),d)

        if np.abs(g) <= -sigma*g0:
            return alpha
        elif g >= 0:
            alpha_lo, alpha_hi = alpha, alpha_prev
            break
        y_prev, alpha_prev, alpha = y, alpha, 2*alpha

    ylo = f(x+alpha_lo*d)

    while True:
        alpha = (alpha_lo + alpha_hi)/2
        y = f(x+alpha*d)
        if y > y0 + beta*alpha*g0 or y >= ylo:
            alpha_hi = alpha
        else:
            g = np.dot(gradient(x+alpha*d),d)
            if np.abs(g) <= -sigma*g0:
                return alpha
            elif g*(alpha_hi - alpha_lo) >= 0:
                alpha_hi = alpha_lo

            alpha_lo = alpha

### Example

In [7]:
def aproximate_line_search(f, x, alpha):
    '''
    One Step of Aproximate Line Search with Strong Backtracking
    '''
    gradient = jax.grad(f)
    d = -gradient(x)

    alpha = strong_backtracking(f, gradient, x, d, alpha)

    return x + alpha*d

def func(x):
    return jax.numpy.sin(x[0]*x[1])+jax.numpy.exp(x[1]+x[2])-x[2]

x = np.array([1.0, 2.0, 3.0])
alpha = 1.0
sol = line_search(func, x, alpha)
print(sol)

[-1.37003906 -0.37003906  0.62996094]


## Algorithm 4.4

In [11]:
def trust_region_descent(f, gradient, H, x, k_max, eta_1 = 0.25, eta_2 = 0.5, gamma_1 = 0.5, gamma_2 = 2.0, delta= 1.0):

    y = f(x)

    for k in range(k_max):
        x_prime, y_prime = solve_trust_region_subproblem(gradient, H, x, delta)
        r = (y- f(x_prime))/(y - y_prime)
        if r < eta_1:
            delta *= gamma_1
        else:
            x, y = x_prime, y_prime
            if r > eta_2:
                delta *= gamma_2

    return x

def solve_trust_region_subproblem(gradient, H, x0, delta):

    n            = len(x0)

    #Variables to optimize
    x_sub        = cp.Variable(n)
    
    #Objective functon is second order Taylor approximation
    objetive_function = cp.Minimize( cp.matmul(np.array(gradient(x0)),(x_sub-x0))+\
                                cp.quad_form(x_sub- x0, np.array(H(x0)))/2.0)

    #Constraint such that solutions of subproblem not far from actual solutuon of
    # master problem
    constraints = [cp.pnorm(x_sub-x0,p=1) <= delta]

    prob   = cp.Problem(objetive_function, constraints)

    result = prob.solve()

    return x_sub.value, prob.value

### Example

In [12]:
def fun(x):
    return 100*x[0]**4 + 0.01*x[1]**4
    
gradient = jax.grad(fun)
H = jax.hessian(fun)

x_0     = [1.0,1.0]
eta_1   = 0.25
eta_2   = 0.5
gamma_1 = 0.5
gamma_2 = 2.0
delta   = 1.0
k_max = 11

sol = trust_region_descent(fun, gradient, H, x, k_max, eta_1, eta_2, gamma_1, gamma_2, delta)
print(sol)

[0.01156102 0.02338696 0.42379419]
