In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib tk

In [2]:
# Define the Sphere function
def sphere(x):
    return np.sum(x**2)

# Gradient of the Sphere function (known)
def grad_sphere(x):
    return 2 * x

def noisy_sphere(x):
    noise = np.random.normal(loc=0, scale=0.01)
    return np.sum(x**2) * np.exp(noise)

# Gradient of the Sphere function (known)
def noisy_grad_sphere(x):
    noise = np.random.randn(*x.shape) * 0.5
    return 2 * (1 + noise) * x  # + noise * np.linalg.norm(x)

cond = 1e6
def ellipsoid(x, cond=cond):
    return sum(cond**(np.arange(len(x)) / (len(x) - 1 + 1e-9)) * np.asarray(x)**2)

def grad_ellipsoid(x, cond=cond):
    noise = np.random.randn(*x.shape)
    return 2 * cond**(np.arange(len(x)) / (len(x) - 1 + 1e-9)) * np.asarray(x) + noise * np.linalg.norm(x)

def noisy_ellipsoid(x, cond=cond):
    noise = np.random.normal(loc=0, scale=0.01)
    return sum(cond**(np.arange(len(x)) / (len(x) - 1 + 1e-9)) * np.asarray(x)**2) * np.exp(noise)

def noisy_grad_ellipsoid(x, cond=cond):
    noise = np.random.randn(*x.shape) * 0.5
    return 2 * (1 + noise) * cond**(np.arange(len(x)) / (len(x) - 1 + 1e-9)) * np.asarray(x)  # + noise * np.linalg.norm(x)

In [3]:
def evolution_strategy(f, x0, sigma, iterations=100, mu=5, lambda_=10):
    dim = len(x0)
    x = x0

    # Initialize path variable, smoothing and damping factors for Cumulative LeArning Rate Adaptation (CLARA)
    path = np.zeros(dim)
    c = 0.2
    d = 1

    # Initialize return variables
    candidate_solutions = []
    step_size = []
    path_norm = []
    step_norm = []

    weights = np.linspace(1, 2, mu)  # Assign higher weight to best individuals
    weights /= weights.sum()  # Normalize weights to sum to 1

    for i in range(iterations):
        # Generate offspring
        pop = np.random.randn(lambda_, dim)

        # Select mu best individuals
        selected = pop[np.argsort([f(x + sigma * ind) for ind in pop])[:mu]]

        # Update current solution
        step = np.sum(selected.T * weights, axis=1)
        x = x + sigma * step
        candidate_solutions.append(x)

        # Update path
        path = (1 - c) * path + np.sqrt(c * (2 - c)) * step
        path_norm.append(np.linalg.norm(path))
        step_norm.append(np.linalg.norm(step))

        # Update step-size
        sigma = sigma * np.exp(c / (2 * d) * ((np.linalg.norm(path)**2 / dim) - 1))
        step_size.append(sigma)


        print(f'Iteration {i}: current fitness = {f(x)}')

    return candidate_solutions, step_size, path_norm, step_norm

In [4]:
def gradient_descent(gradient, x0, lr=0.1, iterations=100, adapt_lr=True):
    """
    Performs gradient descent to minimize a function.

    :param gradient: Function that computes the gradient ∇f(x).
    :param x0: Initial guess (NumPy array).
    :param lr: Learning rate (step size).
    :param iterations: Number of iterations.
    :return: Final optimized value of x.
    """
    x = x0  # Initialize x
    dim = len(x0)  # Get dimension of search space

    # Initialize path variable, smoothing and damping factors for Cumulative LeArning Rate Adaptation (CLARA)
    path = np.zeros(dim)
    c = 0.2
    d = 1

    # Initialize return variables
    candidate_solutions = []
    learning_rate = []
    path_norm = []
    gradient_norm = []

    for _ in range(iterations):
        grad = gradient(x)  # Compute gradient
        step = grad.copy()
        if adapt_lr:
            step /= np.linalg.norm(step)  # TODO: Handle division by zero
        x = x - lr * step # Update step

        # Update path
        path = (1 - c) * path + np.sqrt(c * (2 - c)) * step

        # Update learning rate
        if adapt_lr:
            lr = lr * np.exp(c / (2 * d) * (np.linalg.norm(path)**2 / dim - 1))

        candidate_solutions.append(x)
        learning_rate.append(lr)
        path_norm.append(np.linalg.norm(path))
        gradient_norm.append(np.linalg.norm(grad))

    print('Optimized x: ', x)

    return candidate_solutions, learning_rate, path_norm, gradient_norm

In [5]:
def adam(gradient, x0, lr=0.1, iterations=100, adapt_lr=True):
    """
    Performs gradient descent to minimize a function.

    :param gradient: Function that computes the gradient ∇f(x).
    :param x0: Initial guess (NumPy array).
    :param lr: Learning rate (step size).
    :param iterations: Number of iterations.
    :return: Final optimized value of x.
    """
    x = x0  # Initialize x
    dim = len(x0)  # Get dimension of search space

    # Initialize path variable, smoothing and damping factors for Cumulative LeArning Rate Adaptation (CLARA)
    path = np.zeros(dim)
    c = 0.2
    d = 1

    # Initialize Adam variables for calculating first and second moments
    m = np.zeros(dim)
    v = np.zeros(dim)
    beta1 = 0.9
    beta2 = 0.999

    # Initialize return variables
    candidate_solutions = []
    learning_rate = []
    path_norm = []
    gradient_norm = []

    for i in range(iterations):
        grad = gradient(x)  # Compute gradient

        # Update step
        m = beta1 * m + (1 - beta1) * grad
        m_hat = m / (1 - beta1**(i + 1))
        v = beta2 * v + (1 - beta2) * grad**2
        v_hat = v / (1 - beta2**(i + 1))
        step_adam = m_hat / (np.sqrt(v_hat) + 1e-8)
        if adapt_lr:
            step_adam /= np.linalg.norm(step_adam)  # TODO: Handle division by zero
        # Update solution
        x = x - lr * step_adam

        # Update path of steps taken
        path = (1 - c) * path + np.sqrt(c * (2 - c)) * step_adam

        # Update learning rate
        if adapt_lr:
            lr = lr * np.exp(c / (2 * d) * (np.linalg.norm(path)**2 / dim - 1))

        candidate_solutions.append(x)
        learning_rate.append(lr)
        path_norm.append(np.linalg.norm(path))
        gradient_norm.append(np.linalg.norm(grad))

    print('Optimized x: ', x)

    return candidate_solutions, learning_rate, path_norm, gradient_norm

In [52]:
dim = 2
x0 = np.ones(dim)
lr0 = 1e6
budget = 1000
adapt_lr = True

choice = 1

f = noisy_grad_ellipsoid
# f_name = r'$f_\text{sphere}$'
# f_name = r'$f_\text{elli}$'
# f_name = r'$\tilde{f}_\text{sphere}$'
f_name = r'$\tilde{f}_\text{elli}$'

if choice == 0:  # Vanilla gradient descent
    optimizer = gradient_descent
    fig_title = 'Gradient descent'  # '(mu, lambda)-ES'
    fig_name = 'gd_'
elif choice == 1:  # Adam
    optimizer = adam
    fig_title = 'Adam'
    fig_name = 'adam_'
else:  # Evolution strategy
    optimizer = evolution_strategy
    f = sphere
    fig_title = r'($\mu, \lambda$)-ES'
    fig_name = 'es_'

if adapt_lr:
    fig_title += ' with CLARA'
    fig_name += 'clara_'

if f == grad_sphere or f == sphere:
    fig_name += 'sphere_'
elif f == grad_ellipsoid or f == ellipsoid:
    fig_name += 'elli_'
elif f == noisy_grad_sphere or f == noisy_sphere:
    fig_name += 'sphere_noisy_'
elif f == noisy_grad_ellipsoid or f == noisy_ellipsoid:
    fig_name += 'elli_noisy_'

fig_name += 'lr0_' + str(lr0)

if choice == 0 or choice == 1:
    candidate_sol, learning_rates, path_norm, grad_norm = optimizer(f, x0, lr0, iterations=budget, adapt_lr=adapt_lr)
else:
    candidate_sol, learning_rates, path_norm, grad_norm= optimizer(f, x0, lr0, iterations=budget)

Optimized x:  [-6.95553490e-13 -1.85363542e-12]


In [53]:
results = [[np.linalg.norm(x) for x in candidate_sol],
           learning_rates,
           path_norm,
           grad_norm,
           [path_norm[i]**2 / dim for i in range(len(path_norm))]
           ]
fig_titles = ['Distance to optimum',
              'Learning rate',
              'Path norm',
              'Gradient norm',
              'Normalized path norm'
              ]
y_labels = [r'$\|x_t\|$',
            r'$\eta_t$',
            r'$\|p_t\|$',
            r'$\|g_t\|$',
            r'$\|p_t\|^2 / n$'
            ]
colors = ['r',
          'g',
          'b',
          'c',
          'm'
          ]

fig, axes = plt.subplots(5, 1, figsize=(6, 12))  # 2 rows, 2 columns
fig.suptitle(rf'{fig_title} on {f_name}, $n$ = {str(len(x0))}, $\eta_0$ = {lr0:.0e}')

for i, ax in enumerate(axes.flat):  # Iterate over subplots
    if i < len(results):
        ax.semilogy(results[i], color=colors[i])
        ax.set_title(fig_titles[i])
        ax.set_xlabel('Iterations')
        ax.set_ylabel(y_labels[i])
        ax.grid(True)

# Adjust layout and show
plt.tight_layout()
plt.savefig(fig_name + '.pdf')
plt.show()