# Constrained optimization: equality constraints
### By Qijun Jin and Johnny

### Sequential Quadratic Programming

In [39]:
import numpy as np

In [40]:
def f(x):
    return np.array([np.power(np.e, 3 * x[0]) + np.power(np.e, -4 * x[1])], dtype=np.float64)


def h(x):
    return np.array([np.power(x[0], 2) + np.power(x[1], 2) - 1], dtype=np.float64)

In [41]:
def df(x):
    d1 = 3 * np.power(np.e, 3 * x[0])
    d2 = -4 * np.power(np.e, -4 * x[1])
    return np.array([d1, d2])


def dh(x):
    d1 = 2 * x[0]
    d2 = 2 * x[1]
    return np.array([d1, d2])


def dl(x, lamb):
    return df(x) - lamb * dh(x)

In [42]:
def d2f(x):
    d11 = 9 * np.power(np.e, 3 * x[0])
    d12 = 0
    d21 = 0
    d22 = 16 * np.power(np.e, -4 * x[1])
    return np.array([[d11, d12], [d21, d22]])


def d2h(x):
    d11 = 2
    d12 = 0
    d21 = 0
    d22 = 2
    return np.array([[d11, d12], [d21, d22]])


def d2l(x):
    return d2f(x) - lamb * d2h(x)

In [43]:
def d2la(x, lamb):
    G = d2f(x) - lamb * d2h(x)
    C = -dh(x)

    matrix = np.zeros((3, 3))
    matrix[:2, :2] = G
    matrix[-1, :2] = C
    matrix[:2, -1] = C
    
    return matrix

###### 1. The stopping criterion we have estalished is to check that the norm of $\nabla xL$ is major than epsilon.

In [None]:
def solve(x, lamb, max_iter=1000, eps = 10e-16):
    i = 0

    dlx = dl(x, lamb)

    while i < max_iter and np.linalg.norm(dlx) > eps:
        A = d2la(x, lamb)
        dlx = dl(x, lamb)
        b = np.concatenate((-dlx, h(x)))
        d = np.linalg.solve(A, b)

        x += d[:-1]
        lamb += d[-1]
        i += 1

    return x, lamb

In [45]:
x = np.array([-1., 1.])

lamb = -1

print(solve(x, lamb))

(array([-0.74833549,  0.66332043]), -0.21232493554997134)
(array([-0.74833549,  0.66332043]), -0.2123249355499713)


###### 1. We can can see that the algorithm with initial point closed to the minimum can be converged.

In [46]:
def merit(x, p=10):
    return f(x) + p * np.power(h(x), 2)

def dmerit(x, p=10):
    d1 = df(x)
    d2 = 2 * p * dh(x)
    return d1 + d2

###### 2. Setting x = (100, 100).

In [47]:
x = np.array([100., 100.])

lamb = -1

print(solve(x, lamb))

(array([nan, nan]), nan)


  """
  This is separate from the ipykernel package so we can avoid doing imports until


###### 2. We can see that with initial point far away from minimum pont cannot be solved by Newton-like iterarion.

###### 3. Implement gradient descent method with Merit function.

In [None]:
def gradient_descend_2d(dmerit, x, alpha=1, eps=1e-6, max_iter=1000000):
    i = 0

    while i < max_iter:
        dm = dmerit(x)
        dm_norm = np.linalg.norm(dm)
        dm_normalized = dm / dm_norm

        x -= alpha * dm_normalized
        i += 1

    return x

###### In order to find the minimum, we have used the gradient descent method. In this gradient descent methods, we have penalized the constraint function quadratically when the initial point is far away from the minimum.

###### 4. Gradient descent and Newton-like iteration with Merit function.

In [None]:
x = np.array([100., 100.])

minimum = gradient_descend_2d(dmerit, x, max_iter=100)

print(minimum)

lamb = -1

print(solve(minimum, lamb))

[ 1.55672317 97.89137348]
(array([-0.74833549,  0.66332043]), -0.2123249355499713)


###### As we can see that if we applied first the gradient descent method with the minimizers of the constrained problem. We can approach to a point which is 'optimal' for the Newton-like iteration to find the minimum. This combined method can help us to find the minimum even if the initial point is far away from this minimum. If the initial point is extremely far away from minimum, the result can be diverged to $inf$ because the evaluation of the function with exponential can not be saved in np.float64.