# Chapter 7: Direct Methods

In [2]:
from copy import copy
from dataclasses import dataclass
import jax
import numpy as np

## Algorithm 7.1

In [3]:
def basis(i,n):
    return [1.0 if k == i else 0.0 for k in range(n)]

## Algorithm 7.2

In [2]:
def cyclic_coordinate_descent(f, x, epsilon):
    delta, n = np.infty, x.shape[0]
    while np.abs(delta) > epsilon:
        x_prime = copy(x)
        for i in range(n):
            d = basis(i,n)
            x = line_search(f,x,d)
        delta = np.linalg.norm(x-x_prime)
    
    return x

### Example

In [25]:
def line_search(f, x, d):
    '''
    Approximate line search with armijo condition
    '''
    alpha = 1.0
    tau = 0.7
    c = 0.1
    gradient = jax.grad(f)

    while f(x  + alpha*np.array(d)) > f(x) + c*alpha*np.matmul( np.array(gradient(x)).reshape(-1,1).T , np.array(d).reshape(-1,1) )[0,0]:
        alpha = tau * alpha
    
    return x  + alpha*np.array(d) 

func = lambda x: (1-x[0])**2 + 5*(4*x[1] - x[0]**2)**2

x = np.array([1.0, 3.0])
epsilon = 0.01
x_sol = cyclic_coordinate_descent(func, x, epsilon)
print(x_sol)

[3.96834752 3.9401    ]


## Algorithm 7.3

In [27]:
def cyclic_coordinate_descent_with_acceleration_step(f, x, epsilon):
    delta, n = np.infty, x.shape[0]
    while np.abs(delta) > epsilon:
        x_prime = copy(x)
        for i in range(n):
            d = basis(i,n)
            x = line_search(f,x,d)
        x = line_search(f, x, x-x_prime)
        delta = np.linalg.norm(x-x_prime)
    
    return x

## Algorithm 7.4

In [32]:
def powell(f, x, epsilon):
    n = x.shape[0]
    U = [basis(i,n) for i in range(n)]
    delta = np.infty
    while delta > epsilon:
        x_prime = copy(x)
        for i in range(n):
            d = U[i]
            x_prime = line_search(f, x_prime, d)
        
        for i in range(n-1):
            U[i] = U[i+1]
        
        U[n-1] = d = x_prime - x
        x_prime = line_search(f, x ,d)
        delta = np.linalg.norm(x-x_prime)
        x = x_prime 
    
    return x

## Algorithm 7.5

In [8]:
def hooke_jeeves(f, x, alpha, epsilon, gamma=0.5):
    y, n = f(x), x.shape[0]
    while alpha > epsilon:
        improved = False
        x_best, y_best = x, y
        for i in range(n):
            for sgn in [-1,1]:
                x_prime = x + sgn*alpha*np.array(basis(i,n))
                y_prime = f(x_prime)
                if y_prime < y_best:
                    x_best, y_best, improved = x_prime, y_prime, True
        x, y = x_best, y_best

        if not improved:
            alpha *= gamma
    return x

## Algorithm 7.6

In [27]:
def generalized_pattern_search(f, x, alpha, D, epsilon, gamma=0.5):
    y, n = f(x), x.shape[0]
    while alpha > epsilon:
        improved= False
        for i, d in enumerate(D):
            x_prime = x + alpha*d
            y_prime = f(x_prime)
            if y_prime < y:
                x, y, improved = x_prime, y_prime, True
                del D[i]
                D.insert(i,d)
                break
        if not improved:
            alpha *= gamma
    return x

### Example

In [30]:
func = lambda x: (1-x[0])**2 + 5*(4*x[1] - x[0]**2)**2

x = np.array([1.0, 3.0])
D = [np.array([0.2,3.3]), np.array([0.1,0.1]), np.array([4.1,1.1])]
epsilon = 0.01
alpha = 0.1
x_sol = generalized_pattern_search(func, x, alpha, D, epsilon)
print(x_sol)

[4.875 5.975]


## Algorithm 7.7

In [7]:
def nelder_mead(f, S, epsilon, alpha=1.0, beta=2.0, gamma=0.5):
    delta, y_arr = np.infty, np.array([f(s) for s in S])
    while delta > epsilon:
        p = np.argsort(y_arr)
        S, y_arr = S[p,:], y_arr[p]
        xl, yl = S[0,:], y_arr[0]
        xh, yh = S[-1,:], y_arr[-1]
        xs, ys = S[-2,:], y_arr[-2]
        xm = np.mean(S[0:-2,:])
        xr = xm + alpha*(xm - xh)
        yr = f(xr)

        if yr < yl:
            xe = xm + beta*(xr-xm)
            ye = f(xe)
            S[-1,:], y_arr[-1] = (xe, ye) if ye < yr else (xr, yr)
        elif yr >= ys:
            if yr < yh:
                xh, yh, S[-1,:], y_arr[-1] = xr, yr, xr, yr
            
            xc = xm + gamma*(xh - xm)
            yc = f(xc)
            if yc > yh:
                for i in range(1,y_arr.shape[0]-1):
                    S[i,:] = (S[i,:]+xl)/2
                    y_arr[i] = f(S[i,:])
            else:
                S[-1,:], y_arr[-1] = xc, yc
            
        else:
            S[-1,:], y_arr[-1] = xr, yr
        
        delta = np.std(y_arr, ddof=1)

    return S[np.argmin(y_arr)]

### Example

In [8]:
f = lambda x: -np.exp(-(x[0]*x[1] - 1.5)**2 -(x[1]-1.5)**2)

S = np.array([[0.25,0.25], [0.75,0.5], [0.75,1.25]])
epsilon = 0.1
x = nelder_mead(f, S, epsilon)
print(x)

[1.21875 1.15625]


## Algorithm 7.8

In [None]:
def direct(f, a, b, epsilon, k_max):
    g = reparameterize_to_unit_hypercube(f, a, b)
    intervals = {}
    n = a.shape[0]
    c = np.empty(n).fill(0.5)
    interval = Interval(c, g(c), np.empty(n).fill(0.0))
    intervals = add_interval(intervals, interval)
    c_best, y_best = copy(interval.c), interval.y

    for k in range(k_max):
        S = get_opt_intervals(intervals, epsilon, y_best)
        to_add = []
        for interval in S:
            to_add.append(divide(g, interval))
            intervals[min_depth(interval)].pop(0)
        
        for interval in to_add:
            intervals = add_interval(intervals, interval)
            if interval.y < y_best:
                c_best, y_best = copy(interval.c), interval.y
    
    return rev_unit_hypercube_parameterization(c_best, a, b)

## Algorithm 7.9

In [None]:
rev_unit_hypercube_parameterization = lambda x, a, b : x*(b-a) + a 

def reparameterize_to_unit_hypercube(f, a, b):
    delta = b-a
    retrun lambda x: f(x*delta + a)

## Algorithm 7.10

In [3]:
@dataclass
class Interval:
   c: np.ndarray
   y: float
   depths: np.ndarray

min_depth = lambda interval: np.min(interval.depths)

def add_interval(intervals, interval):
    d = min_depth(interval)

    if d not in intervals:
        intervals[d] = []
    intervals[d].append((interval.y, interval))
    intervals[d].sort(reverse=True)
    return intervals

In [2]:
f = lambda x,y,z : x*y*z

f(3,2,3)

18