In [1]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.optim.lr_scheduler as lr_scheduler
%matplotlib tk

In [2]:
# Define the Sphere function
def sphere(x):
    return np.sum(x ** 2)

# Gradient of the Sphere function (known)
def sphere_gradient(x):
    # noise = np.random.randn(*x.shape) * 0.1
    return 2 * x  #  + noise

cond = 1e6
def ellipsoid(x, cond=cond):
    return sum(cond**(np.arange(len(x)) / (len(x) - 1 + 1e-9)) * np.asarray(x)**2)

def ellipsoid_gradient(x, cond=cond):
    return 2 * cond**(np.arange(len(x)) / (len(x) - 1 + 1e-9)) * np.asarray(x)

In [3]:
def evolution_strategy(f, x, sigma, budget, mu=5, lambda_=10):
    dim = len(x0)

    # Initialize path variable, decay and damping factors for Cumulative LeArning Rate Adaptation (CLARA)
    path = np.zeros(dim)
    c = 0.2
    d = 0.2
    cos_theta = 0

    # Initialize return variables
    candidate_solutions = []
    step_size = []
    path_norm = []
    step_norm = []
    cos_thetas = []

    weights = np.linspace(1, 2, mu)  # Assign higher weight to best individuals
    weights /= weights.sum()  # Normalize weights to sum to 1

    for i in range(budget):
        # Generate offspring
        pop = np.random.randn(lambda_, dim)

        # Select mu best individuals
        selected = pop[np.argsort([f(x + sigma * ind) for ind in pop])[:mu]]

        # Update current solution
        step = np.sum(selected.T * weights, axis=1)
        x = x + sigma * step
        candidate_solutions.append(x)

        # Update mean estimate of cosine of the angle change between current and mean gradient
        cos_theta = (1 - c) * cos_theta + np.sqrt(c * (2 - c)) * np.dot(step, path) / (np.linalg.norm(path) * np.linalg.norm(step) + 1e-8)
        cos_thetas.append(cos_theta)

        # Update path
        path = (1 - c) * path + np.sqrt(c * (2 - c)) * step
        path_norm.append(np.linalg.norm(path)**2)
        step_norm.append(np.linalg.norm(step)**2)

        # Update step-size
        sigma = sigma * np.exp(d * ((np.linalg.norm(path)**2 / dim) - 1))
        step_size.append(sigma)


        print(f'Iteration {i}: current fitness = {f(x)}')

    return candidate_solutions, step_size, path_norm, step_norm, cos_thetas

In [4]:
def gradient_descent(gradient, x0, lr=0.1, iterations=100):
    """
    Performs gradient descent to minimize a function.

    :param gradient: Function that computes the gradient ∇f(x).
    :param x0: Initial guess (NumPy array).
    :param lr: Learning rate (step size).
    :param iterations: Number of iterations.
    :return: Final optimized value of x.
    """
    x = x0  # Initialize x
    dim = len(x0)  # Get dimension of search space

    # Initialize path variable, decay and damping factors for Cumulative LeArning Rate Adaptation (CLARA)
    path = np.zeros(dim)
    cos_theta = 0
    c = 0.2
    d = 0.2

    # Estimate of average of cos(angle) between 2 Gaussian vectors
    # est_angle = average_angle_gaussian_vectors(dim)

    # Initialize return variables
    candidate_solutions = []
    learning_rate = []
    path_norm = []
    gradient_norm = []
    cos_thetas = []

    for i in range(iterations):
        grad = gradient(x)  # Compute gradient
        x = x - lr * grad  # Update step

        # Update mean estimate of cosine of the angle change between current and mean gradient
        cos_theta = (1 - c) * cos_theta + np.sqrt(c * (2 - c)) * np.dot(grad, path) / (np.linalg.norm(path) * np.linalg.norm(grad) + 1e-8)

        # Update path
        path = (1 - c) * path + np.sqrt(c * (2 - c)) * grad

        # Update learning rate
        # lr = lr * np.exp(d * ((np.linalg.norm(path)**2 / dim - 1))
        # lr = lr * np.exp(d * (theta))

        candidate_solutions.append(x)
        learning_rate.append(lr)
        path_norm.append(np.linalg.norm(path)**2)
        gradient_norm.append(np.linalg.norm(grad)**2)
        cos_thetas.append(cos_theta)

    print('Optimized x: ', x)

    return candidate_solutions, learning_rate, path_norm, gradient_norm, cos_thetas

In [5]:
x0 = np.ones(2)
lr0 = 1e-1  # 1e-3
budget = 100

In [8]:
# candidate_sol, learning_rates, path_norm, grad_norm, cos_theta = gradient_descent(sphere_gradient, x0, lr=lr0, iterations=budget)
candidate_sol, learning_rates, path_norm, grad_norm, cos_theta = evolution_strategy(sphere, x0, sigma=lr0, budget=budget)

results = [[np.linalg.norm(x)**2 for x in candidate_sol],
           learning_rates,
           path_norm,
           grad_norm,
           [path_norm[i] / grad_norm[i] for i in range(len(path_norm))],
           cos_theta
           ]
fig_titles = ['Distance to optimum',
              'Learning rate',
              'Path norm',
              'Gradient/step norm',
              'Path-gradient/step ratio',
              'Angle between path and current grad./step'
              ]
y_labels = [r'$\|x_t\|^2$',
            r'$\alpha_t$',
            r'$\|p_t\|^2$',
            r'$\|g_t\|^2$',
            r'$\|p_t\|^2 / \|g_t\|^2$',
            r'$\cos(\theta)$'
            ]
colors = ['r',
          'g',
          'b',
          'c',
          'm',
          'y'
          ]

fig, axes = plt.subplots(3, 2, figsize=(12, 12))  # 2 rows, 2 columns
fig.suptitle(f'Vanilla gradient descent on the sphere, D = {str(len(x0))}')

for i, ax in enumerate(axes.flat):  # Iterate over subplots
    if i == len(results) - 1:
        ax.plot(results[i], color=colors[i])
    else:
        ax.semilogy(results[i], color=colors[i])
    ax.set_title(fig_titles[i])
    ax.set_xlabel('Iterations')
    ax.set_ylabel(y_labels[i])
    ax.grid(True)

# Adjust layout and show
plt.tight_layout()
plt.show()

Iteration 0: current fitness = 1.7527359703148113
Iteration 1: current fitness = 1.5832250172927407
Iteration 2: current fitness = 1.5679570368380213
Iteration 3: current fitness = 1.4864114110958275
Iteration 4: current fitness = 1.331195546596184
Iteration 5: current fitness = 1.2053961653022727
Iteration 6: current fitness = 1.1356091510307653
Iteration 7: current fitness = 1.0340534466198879
Iteration 8: current fitness = 0.903377887519375
Iteration 9: current fitness = 0.8373945746406377
Iteration 10: current fitness = 0.8132385618512055
Iteration 11: current fitness = 0.7067451753022274
Iteration 12: current fitness = 0.6316343358325999
Iteration 13: current fitness = 0.4742678914369941
Iteration 14: current fitness = 0.4049669681690513
Iteration 15: current fitness = 0.27868800572670493
Iteration 16: current fitness = 0.17583499980475312
Iteration 17: current fitness = 0.14900701480045228
Iteration 18: current fitness = 0.05984505552862266
Iteration 19: current fitness = 0.01311