# Homework 3 part 1


In [1]:
from platform import system
from subprocess import check_output
from typing import Callable
from time import time

import numpy as np
import pandas as pd
from numpy.typing import NDArray
from tqdm import tqdm

STUDENT_ID = '00000000'

## Nelder-Mead method

In [2]:
def nelder_mead(
    f: Callable,
    x_0: NDArray,
    radius: float,
    *,
    eps: float = 1e-8,
    steps: float = float('inf'),
    timeout=-1,
    verbose=False,
) -> tuple[float, NDArray, int]:
    """Optimize a function using the Nelder-Mead method.

    Parameters:
        - f: the function to optimize
        - x_0: the initial guess
        - radius: the initial simplex radius
        - eps: the convergence criterion
        - steps: the maximum number of steps
        - timeout: the maximum number of seconds

    Returns:
        - the minimum value of the function
        - the argument that minimizes the function
        - the number of steps taken
    """
    end = time() + timeout
    n = len(x_0)

    # Create initial simplex
    x = np.repeat(x_0[np.newaxis], n + 1, axis=0)
    for i in range(n):
        x[i + 1, i] += radius

    # The first column contains function values
    x = np.c_[[f(x_) for x_ in x], x]
    x = x[np.argsort(x[:, 0])]

    iterations = 0
    if verbose:
        pbar = tqdm()

    while time() < end or steps > 0:
        if verbose:
            pbar.set_description(f'f(x)={x[0, 0]}')
            pbar.update()

        iterations += 1
        steps -= 1

        x_bar = np.mean(x[:-1, 1:], axis=0)
        x_n, y_n = x[-1, 1:], x[-1, 0]
        x_r, x_e = 2 * x_bar - x_n, 3 * x_bar - 2 * x_n
        y_r, y_e = f(x_r), f(x_e)

        if y_r < x[0, 0]:
            x[-1] = (y_e, *x_e) if y_e < y_r else (y_r, *x_r)
        elif y_r < x[-2, 0]:
            x[-1] = y_r, *x_r
        elif y_r < y_n:
            x_oc = x_n + 1.5 * (x_bar - x_n)
            y_oc = f(x_oc)
            if y_oc < y_r:
                x[-1] = y_oc, *x_oc
            else:
                x = shrink(x, f)
        else:
            x_ic = x_n + 0.5 * (x_bar - x_n)
            y_ic = f(x_ic)
            if y_ic < y_n:
                x[-1] = y_ic, *x_ic
            else:
                x = shrink(x, f)

        x = x[np.argsort(x[:, 0])]
        if np.max(x[:, 0]) - np.min(x[:, 0]) < eps:
            if verbose:
                pbar.close()
            return x[0, 0], x[0, 1:], iterations

    if verbose:
        pbar.close()
    return x[0, 0], x[0, 1:], iterations


def shrink(x: NDArray, f: Callable) -> NDArray:
    """Perform a shrink operation on the simplex.

    Parameters:
        - x: the simplex, with the first column containing function values
        - f: the function to optimize

    Returns:
        - the new simplex
    """
    x[1:] = 0.5 * (x[0] + x[1:])
    x[:, 0] = [f(x_) for x_ in x[:, 1:]]
    return x

We compare Nelder-Mead results to methods implemented in the previous homework.

For each function, we select only one of the two initial points used - the one
that gave the best results.

In [12]:
methods = ['GD', 'Polyak', 'Nesterov', 'AdaGrad', 'Newton', 'BFGS']

In [22]:
radii = [0.001, 0.01, 0.02, 0.05, 0.1, 0.5]


def f_a(x_):
    x, y, z = x_
    return (x - z) ** 2 + (2 * y + z) ** 2 + (4 * x - 2 * y + z) ** 2 + x + y


# Data from the previous homework
data = [
    ('N=2', -0.035, -0.037, -0.037, -0.032, -0.198, 30.3),
    ('N=5', -0.073, -0.077, -0.077, -0.056, -0.198, 47.6),
    ('N=10', -0.113, -0.119, -0.119, -0.082, -0.198, -0.198),
    ('N=100', -0.198, -0.198, -0.198, -0.180, -0.198, -0.198),
    ('t=0.1s', -0.198, -0.198, -0.198, -0.198, -0.198, -0.198),
    ('t=1s', -0.198, -0.198, -0.198, -0.198, -0.198, -0.198),
    ('t=2s', -0.198, -0.198, -0.198, -0.198, -0.198, -0.198),
]
previous_results_a = pd.DataFrame(data, columns=['Stopping', *methods])
print('Previous results:')
display(previous_results_a)

data = [['N=2'], ['N=5'], ['N=10'], ['N=100'], ['t=0.1s'], ['t=1s'], ['t=2s']]
for r in radii:
    for i, N in enumerate([2, 5, 10, 100]):
        data[i].append(nelder_mead(f_a, np.zeros(3), r, steps=N)[0])
    for i, t in enumerate([0.1, 1, 2]):
        data[i + 4].append(nelder_mead(f_a, np.zeros(3), r, steps=0, timeout=t)[0])
nelder = [f'NM (r={r:.3f})' for r in radii]
results_a = pd.DataFrame(data, columns=['Stopping', *nelder]).round(3)
print('Nelder-Mead results:')
display(results_a)

Previous results:


Unnamed: 0,Stopping,GD,Polyak,Nesterov,AdaGrad,Newton,BFGS
0,N=2,-0.035,-0.037,-0.037,-0.032,-0.198,30.3
1,N=5,-0.073,-0.077,-0.077,-0.056,-0.198,47.6
2,N=10,-0.113,-0.119,-0.119,-0.082,-0.198,-0.198
3,N=100,-0.198,-0.198,-0.198,-0.18,-0.198,-0.198
4,t=0.1s,-0.198,-0.198,-0.198,-0.198,-0.198,-0.198
5,t=1s,-0.198,-0.198,-0.198,-0.198,-0.198,-0.198
6,t=2s,-0.198,-0.198,-0.198,-0.198,-0.198,-0.198


Nelder-Mead results:


Unnamed: 0,Stopping,NM (r=0.001),NM (r=0.010),NM (r=0.020),NM (r=0.050),NM (r=0.100),NM (r=0.500)
0,N=2,-0.003,-0.023,-0.043,-0.096,-0.075,0.0
1,N=5,-0.01,-0.071,-0.064,-0.114,-0.168,0.0
2,N=10,-0.049,-0.167,-0.196,-0.195,-0.191,-0.172
3,N=100,-0.198,-0.198,-0.198,-0.198,-0.198,-0.198
4,t=0.1s,-0.198,-0.198,-0.198,-0.198,-0.198,-0.198
5,t=1s,-0.198,-0.198,-0.198,-0.198,-0.198,-0.198
6,t=2s,-0.198,-0.198,-0.198,-0.198,-0.198,-0.198


In [26]:
radii = [0.001, 0.01, 0.02, 0.05, 0.1, 0.5]


def f_b(x_):
    x, y, z = x_
    return (x - 1) ** 2 + (y - 1) ** 2 + 100 * (y - x**2) ** 2 + 100 * (z - y**2) ** 2


data = [
    ('N=2', 8.091, 7.926, 7.941, 8.155, 0.035, None),
    ('N=5', 4.814, 4.416, 4.454, 5.941, 0.000, None),
    ('N=10', 2.113, 1.755, 1.787, 4.056, 0.000, None),
    ('N=100', 0.018, 0.018, 0.018, 0.102, 0.000, None),
    ('t=0.1s', 0.0002, 0.0007, 0.0009, 0.0076, 0.0000, 0.0000),
    ('t=1s', 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000),
    ('t=2s', 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000),
]
previous_results_b = pd.DataFrame(data, columns=['Stopping', *methods])
print('Previous results:')
display(previous_results_b)

data = [['N=2'], ['N=5'], ['N=10'], ['N=100'], ['t=0.1s'], ['t=1s'], ['t=2s']]
for r in radii:
    for i, N in enumerate([2, 5, 10, 100]):
        data[i].append(nelder_mead(f_b, np.ones(3) * 1.2, r, steps=N)[0])
    for i, t in enumerate([0.1, 1, 2]):
        data[i + 4].append(nelder_mead(f_b, np.ones(3) * 1.2, r, steps=0, timeout=t)[0])
nelder = [f'NM (r={r:.3f})' for r in radii]
results_b = pd.DataFrame(data, columns=['Stopping', *nelder]).round(3)
print('Nelder-Mead results:')
display(results_b)

Previous results:


Unnamed: 0,Stopping,GD,Polyak,Nesterov,AdaGrad,Newton,BFGS
0,N=2,8.091,7.926,7.941,8.155,0.035,
1,N=5,4.814,4.416,4.454,5.941,0.0,
2,N=10,2.113,1.755,1.787,4.056,0.0,
3,N=100,0.018,0.018,0.018,0.102,0.0,
4,t=0.1s,0.0002,0.0007,0.0009,0.0076,0.0,0.0
5,t=1s,0.0,0.0,0.0,0.0,0.0,0.0
6,t=2s,0.0,0.0,0.0,0.0,0.0,0.0


Nelder-Mead results:


Unnamed: 0,Stopping,NM (r=0.001),NM (r=0.010),NM (r=0.020),NM (r=0.050),NM (r=0.100),NM (r=0.500)
0,N=2,11.209,8.011,5.123,3.619,1.999,11.6
1,N=5,9.884,0.821,0.345,0.062,0.256,1.781
2,N=10,1.717,0.816,0.345,0.051,0.082,0.456
3,N=100,0.0,0.0,0.0,0.0,0.0,0.0
4,t=0.1s,0.0,0.0,0.0,0.0,0.0,0.0
5,t=1s,0.0,0.0,0.0,0.0,0.0,0.0
6,t=2s,0.0,0.0,0.0,0.0,0.0,0.0


In [34]:
radii = [0.05, 0.1, 0.5, 1.0, 2.0, 3.0]


def f_c(x_):
    x, y = x_
    return (
        (1.5 - x + x * y) ** 2
        + (2.25 - x + x * y**2) ** 2
        + (2.625 - x + x * y**3) ** 2
    )


data = [
    ('N=2', 6.75, 6.49, 6.56, 13.7, 2.15, None),
    ('N=5', 3.97, 3.66, 3.70, 13.3, 2.49, None),
    ('N=10', 2.04, 1.82, 1.83, 12.8, 1332.2, None),
    ('N=100', 0.08, 0.07, 0.07, 9.3, 9.9, None),
    ('t=0.1s', 0.000, 0.000, 0.000, 0.061, 9.9, None),
    ('t=1s', 0.000, 0.000, 0.000, 0.000, 9.9, None),
    ('t=2s', 0.000, 0.000, 0.000, 0.000, 9.9, None),
]
previous_results_c = pd.DataFrame(data, columns=['Stopping', *methods])
print('Previous results:')
display(previous_results_c)

data = [['N=2'], ['N=5'], ['N=10'], ['N=100'], ['t=0.1s'], ['t=1s'], ['t=2s']]
for r in radii:
    for i, N in enumerate([2, 5, 10, 100]):
        data[i].append(nelder_mead(f_c, np.ones(2), r, steps=N)[0])
    for i, t in enumerate([0.1, 1, 2]):
        data[i + 4].append(nelder_mead(f_c, np.ones(2), r, steps=0, timeout=t)[0])
nelder = [f'NM (r={r:.1f})' for r in radii]
results_c = pd.DataFrame(data, columns=['Stopping', *nelder]).round(3)
print('Nelder-Mead results:')
display(results_c)

Previous results:


Unnamed: 0,Stopping,GD,Polyak,Nesterov,AdaGrad,Newton,BFGS
0,N=2,6.75,6.49,6.56,13.7,2.15,
1,N=5,3.97,3.66,3.7,13.3,2.49,
2,N=10,2.04,1.82,1.83,12.8,1332.2,
3,N=100,0.08,0.07,0.07,9.3,9.9,
4,t=0.1s,0.0,0.0,0.0,0.061,9.9,
5,t=1s,0.0,0.0,0.0,0.0,9.9,
6,t=2s,0.0,0.0,0.0,0.0,9.9,


Nelder-Mead results:


Unnamed: 0,Stopping,NM (r=0.1),NM (r=0.1).1,NM (r=0.5),NM (r=1.0),NM (r=2.0),NM (r=3.0)
0,N=2,11.57,9.243,1.078,0.703,1.078,11.013
1,N=5,1.916,1.463,1.078,0.703,1.078,0.238
2,N=10,1.194,0.616,0.159,0.045,0.01,0.036
3,N=100,0.0,0.0,0.0,0.0,0.0,0.0
4,t=0.1s,0.0,0.0,0.0,0.0,0.0,0.0
5,t=1s,0.0,0.0,0.0,0.0,0.0,0.0
6,t=2s,0.0,0.0,0.0,0.0,0.0,0.0


## Black box optimization

In [32]:
def rmsprop(
    f: Callable, x_0: NDArray, eps: float, lr: float, forgetting: float
) -> tuple[float, NDArray, int]:
    velocity = 0
    deltas = np.eye(3) * 1e-8
    value = f(x_0)
    iterations = 0

    with tqdm(desc=f'f(x)={value}') as pbar:
        while True:
            iterations += 1
            pbar.update()

            grad = np.array([(f(x_0 + e) - value) / 1e-8 for e in deltas])
            velocity = forgetting * velocity + (1 - forgetting) * grad**2
            dx = lr / np.sqrt(velocity + 1e-8) * grad
            x_0 -= dx

            new_value = f(x_0)
            pbar.set_description(f'f(x)={new_value}')

            if np.abs(new_value - value) < eps:
                pbar.close()
                return new_value, x_0, iterations

            value = new_value


def nesterov(
    f: Callable, x_0: NDArray, eps: float, lr: float, momentum: float
) -> tuple[float, NDArray, int]:
    x_1 = x_0
    deltas = np.eye(3) * 1e-8
    value = f(x_0)
    iterations = 0

    with tqdm(desc=f'f(x)={value}') as pbar:
        while True:
            iterations += 1
            pbar.update()

            offset = momentum * (x_1 - x_0)
            grad = np.array([(f(x_1 + offset + e) - value) / 1e-8 for e in deltas])
            dx = lr * grad
            x_0, x_1 = x_1, x_1 - dx + offset

            new_value = f(x_1)
            pbar.set_description(f'f(x)={new_value}')

            if np.abs(new_value - value) < eps:
                pbar.close()
                return new_value, x_1, iterations

            value = new_value


def ada_grad(
    f: Callable, x_0: NDArray, eps: float, lr: float, *args
) -> tuple[float, NDArray, int]:
    gradients_sum = np.zeros_like(x_0) + 1e-8
    deltas = np.eye(3) * 1e-8
    value = f(x_0)
    iterations = 0

    with tqdm(desc=f'f(x)={value}') as pbar:
        while True:
            iterations += 1
            pbar.update()

            grad = np.array([(f(x_0 + e) - value) / 1e-8 for e in deltas])
            gradients_sum = gradients_sum + grad**2
            D_k = np.diag(np.power(gradients_sum, -0.5))
            x_0 -= lr * D_k @ grad

            new_value = f(x_0)
            pbar.set_description(f'f(x)={new_value}')

            if np.abs(new_value - value) < eps:
                pbar.close()
                return new_value, x_0, iterations

            value = new_value


def gradient_descent(
    f: Callable, x_0: NDArray, eps: float, lr: float, *args
) -> tuple[float, NDArray, int]:
    deltas = np.eye(3) * 1e-8
    value = f(x_0)
    iterations = 0

    with tqdm(desc=f'f(x)={value}') as pbar:
        while True:
            iterations += 1
            pbar.update()

            dx = np.array([(f(x_0 + e) - value) / 1e-8 for e in deltas])
            x_0 -= lr * dx
            new_value = f(x_0)
            pbar.set_description(f'f(x)={new_value}')

            if np.abs(new_value - value) < eps:
                pbar.close()
                return new_value, x_0, iterations

            value = new_value


def black_box_minimization(i: int, gd=None) -> tuple[float, NDArray, int]:
    if i not in (1, 2, 3):
        raise ValueError('Invalid function index')

    executable = './hw3_2024_linux'
    if system() == 'Darwin':
        executable = './hw3_2024_mac'
    elif system() == 'Windows':
        executable = './hw3_2024_win.exe'

    def f(x):
        res = check_output([executable, STUDENT_ID, str(i), *x.astype(str)])
        return float(res)

    if gd is None:
        return nelder_mead(f, np.zeros(3), 0.5, eps=1e-11, verbose=True)

    methods_str = ['gd', 'ada', 'nesterov', 'rmsprop']
    gd_method = (gradient_descent, ada_grad, nesterov, rmsprop)[
        methods_str.index(gd[0])
    ]
    return gd_method(f, np.zeros(3), 1e-11, *gd[1:])

### Optimization using Nelder-Mead

In [13]:
minimum_value, minimum, iterations = black_box_minimization(1)
print(f'Minimum f_1({minimum})={minimum_value} found in {iterations} iterations')

f(x)=0.790091360004614: : 81it [10:33,  7.82s/it]

Minimum f_1([0.36789946 0.91367979 0.00913816])=0.790091360004614 found in 81 iterations





In [9]:
minimum_value, minimum, iterations = black_box_minimization(2)
print(f'Minimum f_2({minimum})={minimum_value} found in {iterations} iterations')

f(x)=0.790091360002563: : 103it [11:32,  6.73s/it]

Minimum f_2([0.00913924 0.91367866 0.36790025])=0.790091360002563 found in 103 iterations





In [10]:
minimum_value, minimum, iterations = black_box_minimization(3)
print(f'Minimum f_3({minimum})={minimum_value} found in {iterations} iterations')

f(x)=0.790091360004529: : 71it [09:37,  8.13s/it]

Minimum f_3([0.91367968 0.00913563 0.36789706])=0.790091360003384 found in 71 iterations





### Optimization using gradient descent

In [11]:
minimum_value, minimum, iterations = black_box_minimization(1, gd=('gd', 0.1))
print(f'Minimum f_1({minimum})={minimum_value} found in {iterations} iterations')

f(x)=0.79009136001716: : 54it [09:08, 10.16s/it] 

Minimum f_1([0.36789936 0.91367516 0.00913674])=0.79009136001716 found in 54 iterations





In [None]:
# Was not able to minimize
# minimum_value, minimum, iterations = black_box_minimization(2, gd=('rmsprop', 0.1, 0.8))
# print(f'Minimum f_2({minimum})={minimum_value} found in {iterations} iterations')

In [12]:
minimum_value, minimum, iterations = black_box_minimization(3, gd=('ada', 1))
print(f'Minimum f_3({minimum})={minimum_value} found in {iterations} iterations')

f(x)=0.790091360011904: : 36it [07:36, 12.67s/it]

Minimum f_3([0.91367737 0.00913634 0.36789397])=0.790091360011904 found in 36 iterations



