In [1]:
import numpy as np

In [2]:
def f(x):
    x1 = x[0]
    x2 = x[1]
    x3 = x[2]
    
    # local minima = (0, 5, 0)
    # no globam minima
    return x1**2 + (x2-5)**2 + x3**2 + np.sin(x1)**2

In [3]:
def grad_f(x):
    x1 = x[0]
    x2 = x[1]
    x3 = x[2]
    
    return np.array([2*x1 + 2*np.sin(x1)*np.cos(x1), 2*(x2-5), 2*x3])

In [4]:
def g(x):
    x1 = x[0]
    x2 = x[1]
    
    # global minima = (2/3, -5/3)
    return -(5 + 3*x1 - 4*x2 - x1**2 + x1*x2 - x2**2)

In [5]:
def grad_g(x):
    x1 = x[0]
    x2 = x[1]
    
    return np.array([2*x1 - x2 - 3, -x1 + 2*x2 + 4])

In [6]:
def h(x):
    x1 = x[0]
    x2 = x[1]
    
    # global minima = every (x,y) such that x^2+2y^2=4
    return (4 - x1**2 - 2*x2**2)**2

In [7]:
def grad_h(x):
    x1 = x[0]
    x2 = x[1]
    
    return np.array([-4*x1*(-x1**2 - 2*(x2**2) + 4), -8*x2*(-x1**2 -2*(x2**2) + 4)])

In [8]:
functions = np.array([f, g, h])
gradients = np.array([grad_f, grad_g, grad_h])
num_of_args = np.array([3, 2, 2])

In [9]:
def zoom(x, p, phi, phi_grad, alpha_lo, alpha_hi, c1, c2):
    
    while True:
        alpha_j = (alpha_hi + alpha_lo)/2
        
        phi_alpha_j = phi(alpha_j)
        
        if (phi_alpha_j > phi(0) + c1*alpha_j*phi_grad(0)) or (phi_alpha_j >= phi(alpha_lo)):
            alpha_hi = alpha_j
        else:
            phi_grad_alpha_j = phi_grad(alpha_j)
            
            if np.abs(phi_grad_alpha_j) <= -c2*phi_grad(0):
                return alpha_j
            
            if phi_grad_alpha_j*(alpha_hi - alpha_lo) >= 0:
                alpha_hi = alpha_lo
            
            alpha_lo = alpha_j

In [10]:
def line_search_wolfe(fun, grad, x, p, maxiter=100, c1=10**(-3), c2=0.9, alpha_1=1.0, alpha_max=10**6):
    if alpha_1 >= alpha_max:
        raise ValueError('Argument alpha_1 should be less than alpha_max')
    
    def phi(alpha):
        return fun(x + alpha*p)
    
    def phi_grad(alpha):
        return np.dot(grad(x + alpha*p).T, p)
    
    alpha_old = 0
    alpha_new = alpha_1
    
    final_alpha = None
    
    for i in np.arange(1, maxiter+1):
        phi_alpha = phi(alpha_new)
        
        if (i == 1 and phi_alpha > phi(0) + c1*alpha_new*phi_grad(0)) or (i > 1 and phi_alpha >= phi(alpha_old)):
            final_alpha = zoom(x, p, phi, phi_grad, alpha_old, alpha_new, c1, c2)
            break
        
        phi_grad_alpha = phi_grad(alpha_new)
        
        if np.abs(phi_grad_alpha) <= -c2 * phi_grad(0):
            final_alpha = alpha_new
            break
        
        if phi_grad_alpha >= 0:
            final_alpha = zoom(x, p, phi, phi_grad, alpha_new, alpha_old, c1, c2)
            break
            
        alpha_old = alpha_new
        alpha_new = alpha_new + (alpha_max - alpha_new) * np.random.rand(1)
        
    if i == maxiter and final_alpha is None:
        return None

    return final_alpha

In [11]:
def BFGS(fun, grad, x_start, eps, max_iterations=100):
    n = len(x_start)
    B_old = np.diag(np.ones(n))
    x_old = x_start
    
    for i in np.arange(1, max_iterations+1):
        p = -1*np.dot(B_old, grad(x_old))
        
        alpha = line_search_wolfe(fun, grad, x_old, p, maxiter=max_iterations)
        
        if alpha is None:
            print('Wolfe line search did not converge')
            return x_old, i
        
        x_new = x_old + alpha*p
        s = x_new - x_old
        y = grad(x_new) - grad(x_old)
        
        I = np.diag(np.ones(n))
        rho = 1 / np.dot(y.T, s)
        B_new = np.dot(np.dot(I - rho*np.dot(s, y.T), B_old), I - rho*np.dot(y, s.T)) + rho*np.dot(s, s.T)
        
        print('x_k = {0} converges to x_(k+1) = {1}'.format(x_old, x_new))
        
        B_dist = np.linalg.norm(grad(x_old) - grad(x_new))
        if B_dist < eps:
            break
        else:
            print('There is still {0} left for approximations to converge'.format(np.abs(B_dist-eps)), '\n')
        
        x_old = x_new
        B_old = B_new
        
    print('\nFinal approximation of the minima is {0}.'.format(x_new))
    if i != max_iterations:
        print('Optimization process converged in {0} steps'.format(i))
    else:
        print('Optimization process did not converge')
        
    return x_new, i

In [12]:
n = len(functions)

In [13]:
for i in np.arange(n):
    x_start = np.random.randint(-100, 100, num_of_args[i])
    print('\nMinimizing function {0} with the starting point {1}\n'.format(functions[i].__name__, x_start))
    BFGS(functions[i], gradients[i], x_start, 0.001, max_iterations=30)


Minimizing function f with the starting point [-11 -32  54]

x_k = [-11 -32  54] converges to x_(k+1) = [-4.42565465e-03  5.00000000e+00  0.00000000e+00]
There is still 132.75001633269696 left for approximations to converge 

x_k = [-4.42565465e-03  5.00000000e+00  0.00000000e+00] converges to x_(k+1) = [1.10640198e-03 5.00331924e+00 3.31924375e-03]
There is still 0.02303730947284094 left for approximations to converge 

x_k = [1.10640198e-03 5.00331924e+00 3.31924375e-03] converges to x_(k+1) = [-3.07927373e-03  4.99906442e+00 -9.35584501e-04]
There is still 0.01961905047552223 left for approximations to converge 

x_k = [-3.07927373e-03  4.99906442e+00 -9.35584501e-04] converges to x_(k+1) = [-1.10892998e-03  5.00099395e+00  9.93954997e-04]
There is still 0.008586473176069618 left for approximations to converge 

x_k = [-1.10892998e-03  5.00099395e+00  9.93954997e-04] converges to x_(k+1) = [-9.88938667e-04  5.00110140e+00  1.10140016e-03]

Final approximation of the minima is [-9.8