In [1]:
import numpy as np
import pyensmallen

## Rosenbrock's banana

$$
f(x, y) = (a - x)^2 + b(y - x^2)^2
$$

global optimum reached at $(a, a^2)$. 

In [2]:
def rosenbrock(x, a=2):
    return np.sum((a - x[:-1]) ** 2 + 100.0 * (x[1:] - x[:-1] ** 2) ** 2)


def rosenbrock_gradient(x, a=2):
    grad = np.zeros_like(x)

    # Gradient for the first element
    grad[0] = -2 * (a - x[0]) - 400 * x[0] * (x[1] - x[0] ** 2)

    # Gradient for the middle elements
    grad[1:-1] = (
        -2 * (a - x[1:-1])
        + 200 * (x[1:-1] - x[:-2] ** 2)
        - 400 * x[1:-1] * (x[2:] - x[1:-1] ** 2)
    )

    # Gradient for the last element
    grad[-1] = 200 * (x[-1] - x[-2] ** 2)

    return grad


def objective_function(x, grad):
    grad[:] = rosenbrock_gradient(x)
    return rosenbrock(x)

## BFGS

In [3]:
# Initialize L-BFGS optimizer
lbfgs = pyensmallen.L_BFGS(numBasis=10, maxIterations=1000)

# Initial guess
initial_x = np.array([-1.2, 1.0])

# Optimize
result = lbfgs.optimize(objective_function, initial_x)

print("Optimized parameters:", result)
print("Objective value:", rosenbrock(result))

Optimized parameters: [2. 4.]
Objective value: 2.7026507009290854e-19


Gets to optimum exactly.

## Adam

In [4]:
initial_w = np.random.randn(2)

In [5]:
adam = pyensmallen.Adam(maxIterations=100_000)
result = adam.optimize(objective_function, initial_w)
result

array([1.84242719, 3.39499252])

In [6]:
adamax = pyensmallen.AdaMax(maxIterations=100_000)
result = adamax.optimize(objective_function, initial_w)
result

array([1.9529393 , 3.81386375])

In [7]:
a = pyensmallen.OptimisticAdam(maxIterations=100_000)
result = a.optimize(objective_function, initial_w)
result

array([1.5916263 , 2.53414253])

In [8]:
a = pyensmallen.AMSGrad(maxIterations=100_000)
result = a.optimize(objective_function, initial_w)
result

array([0.65721087, 0.42299591])

In [9]:
a = pyensmallen.Nadam(maxIterations=100_000)
result = a.optimize(objective_function, initial_w)
result

array([1.90080892, 3.61223393])