In [1]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import animation

# Define the function to optimize (2D Rosenbrock function)
def rosenbrock(x, y):
    return (1 - x)**2 + 100 * (y - x**2)**2

# Define the optimization algorithms
optimizers = ['gd', 'momentum', 'nag', 'adagrad', 'rmsprop', 'adam']

# Define the animation update function
def update(frame):
    ax.cla()
    optimizer = optimizers[frame]
    ax.set_title(optimizer.upper() + " Optimization")

    # Perform optimization
    x = np.linspace(-2, 2, 100)
    y = np.linspace(-1, 3, 100)
    X, Y = np.meshgrid(x, y)
    Z = rosenbrock(X, Y)

    # Plot the contour
    ax.contour(X, Y, Z, levels=np.logspace(-1, 3, 10))

    # Initialize parameters
    curr_x, curr_y = -2, 2
    prev_dx, prev_dy = 0, 0

    # Perform optimization iterations
    iterations = 100
    learning_rate = 0.001
    for i in range(iterations):
        # Compute gradients
        dx = 2 * (200 * curr_x**3 - 200 * curr_x * curr_y + curr_x - 1)
        dy = 200 * (curr_y - curr_x**2)

        # Update parameters based on the optimizer
        if optimizer == 'gd':  # Gradient Descent
            curr_x -= learning_rate * dx
            curr_y -= learning_rate * dy
        elif optimizer == 'momentum':  # Momentum
            curr_x, curr_y, prev_dx, prev_dy = (
                curr_x - learning_rate * dx + 0.9 * prev_dx,
                curr_y - learning_rate * dy + 0.9 * prev_dy,
                dx, dy
            )
        elif optimizer == 'nag':  # Nesterov Accelerated Gradient
            lookahead_x, lookahead_y = (
                curr_x - learning_rate * 0.9 * prev_dx,
                curr_y - learning_rate * 0.9 * prev_dy
            )
            lookahead_dx = 2 * (200 * lookahead_x**3 - 200 * lookahead_x * lookahead_y + lookahead_x - 1)
            lookahead_dy = 200 * (lookahead_y - lookahead_x**2)
            curr_x, curr_y, prev_dx, prev_dy = (
                curr_x - learning_rate * dx + 0.9 * lookahead_dx,
                curr_y - learning_rate * dy + 0.9 * lookahead_dy,
                lookahead_dx, lookahead_dy
            )
        elif optimizer == 'adagrad':  # AdaGrad
            cache_x += dx**2
            cache_y += dy**2
            curr_x -= learning_rate * dx / np.sqrt(cache_x + 1e-8)
            curr_y -= learning_rate * dy / np.sqrt(cache_y + 1e-8)
        elif optimizer == 'rmsprop':  # RMSProp
            cache_x = 0.9 * cache_x + 0.1 * dx**2
            cache_y = 0.9 * cache_y + 0.1 * dy**2
            curr_x -= learning_rate * dx / np.sqrt(cache_x + 1e-8)
            curr_y -= learning_rate * dy / np.sqrt(cache_y + 1e-8)
        elif optimizer == 'adam':  # Adam
            m_x = 0.9 * m_x + 0.1 * dx
            m_y = 0.9 * m_y + 0.1 * dy
            v_x = 0.999 * v_x + 0.001 * dx**2
            v_y = 0.999 * v_y + 0.001 * dy**2
            m_x_hat = m_x / (1 - 0.9**(i + 1))
            m_y_hat = m_y / (1 - 0.9**(i + 1))
            v_x_hat = v_x / (1 - 0.999**(i + 1))
            v_y_hat = v_y / (1 - 0.999**(i + 1))
            curr_x -= learning_rate * m_x_hat / (np.sqrt(v_x_hat) + 1e-8)
            curr_y -= learning_rate * m_y_hat / (np.sqrt(v_y_hat) + 1e-8)

        # Plot the current point
        ax.plot(curr_x, curr_y, 'bo')

    return ax

# Create the figure and axis
fig = plt.figure()
ax = plt.axes(xlim=(-2, 2), ylim=(-1, 3))

# Create the animation
anim = animation.FuncAnimation(fig, update, frames=len(optimizers), interval=1000, repeat=True)

# Display the animation
plt.show()

NameError: ignored