In [2]:
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
from matplotlib.animation import FuncAnimation

def animate_optimizers(initial_params = np.array([-3.0, 0.0001])):
    """
    Creates an animated 3D plot showing the optimization paths of SGD, RMSProp, and Adam
    on a saddle function using Matplotlib.
    """
    # Define the saddle function and its gradient
    def saddle_function(params):
        x, y = params
        return x**2 - y**2

    def gradient(params):
        x, y = params
        return np.array([2 * x, -2 * y])

    # Define the optimizers
    def sgd(params, grads, lr):
        return params - lr * grads

    def rmsprop(params, grads, cache, lr=0.1, beta=0.9, epsilon=1e-8):
        cache = beta * cache + (1 - beta) * (grads ** 2)
        params = params - lr * grads / (np.sqrt(cache) + epsilon)
        return params, cache

    def adam(params, grads, m, v, t, lr=0.1, beta1=0.9, beta2=0.999, epsilon=1e-8):
        m = beta1 * m + (1 - beta1) * grads
        v = beta2 * v + (1 - beta2) * (grads ** 2)
        m_hat = m / (1 - beta1 ** t)
        v_hat = v / (1 - beta2 ** t)
        params = params - lr * m_hat / (np.sqrt(v_hat) + epsilon)
        return params, m, v

    # Initialize parameters
    

    # Optimization settings
    learning_rate = 0.1
    num_iterations = 20

    # Initialize paths for plotting
    path_sgd = [initial_params.copy()]
    path_rmsprop = [initial_params.copy()]
    path_adam = [initial_params.copy()]

    # Initialize RMSProp and Adam variables
    cache_rmsprop = np.zeros_like(initial_params)
    m_adam = np.zeros_like(initial_params)
    v_adam = np.zeros_like(initial_params)

    # Current parameters
    params_sgd = initial_params.copy()
    params_rmsprop = initial_params.copy()
    params_adam = initial_params.copy()

    # Run optimization and record paths
    for t in range(1, num_iterations + 1):
        # Compute gradients
        grads_sgd = gradient(params_sgd)
        grads_rmsprop = gradient(params_rmsprop)
        grads_adam = gradient(params_adam)

        # Update parameters using SGD
        params_sgd = sgd(params_sgd, grads_sgd, learning_rate)
        path_sgd.append(params_sgd.copy())

        # Update parameters using RMSProp
        params_rmsprop, cache_rmsprop = rmsprop(params_rmsprop, grads_rmsprop, cache_rmsprop, lr=learning_rate)
        path_rmsprop.append(params_rmsprop.copy())

        # Update parameters using Adam
        params_adam, m_adam, v_adam = adam(params_adam, grads_adam, m_adam, v_adam, t, lr=learning_rate)
        path_adam.append(params_adam.copy())

    # Convert paths to NumPy arrays
    path_sgd = np.array(path_sgd)
    path_rmsprop = np.array(path_rmsprop)
    path_adam = np.array(path_adam)

    # Compute loss for each path
    loss_sgd = saddle_function(path_sgd.T)
    loss_rmsprop = saddle_function(path_rmsprop.T)
    loss_adam = saddle_function(path_adam.T)

    # Create a meshgrid for the loss surface
    x = np.linspace(-3, 3, 400)
    y = np.linspace(-3, 3, 400)
    X, Y = np.meshgrid(x, y)
    Z = saddle_function([X, Y])

    # Set up the figure and 3D axis
    fig = plt.figure(figsize=(12, 8))
    ax = fig.add_subplot(111, projection='3d')

    # Plot the loss surface
    surf = ax.plot_surface(X, Y, Z, cmap=cm.viridis, alpha=0.6, edgecolor='none')

    # Initialize lines for each optimizer
    line_sgd, = ax.plot([], [], [], marker='o', color='red', label='SGD')
    line_rmsprop, = ax.plot([], [], [], marker='^', color='orange', label='RMSProp')
    line_adam, = ax.plot([], [], [], marker='x', color='blue', label='Adam')

    # Highlight starting and saddle points
    ax.scatter(initial_params[0], initial_params[1], saddle_function(initial_params),
               color='green', s=100, label='Start')
    ax.scatter(0, 0, saddle_function([0, 0]),
               color='black', s=100, label='Saddle Point')

    # Set labels and title
    ax.set_xlabel('X Parameter')
    ax.set_ylabel('Y Parameter')
    ax.set_zlabel('Loss')
    ax.set_title('Optimization Paths on Saddle Function: SGD vs. RMSProp vs. Adam')
    ax.legend()

    # Define the update function for animation
    def update(num, path_sgd, path_rmsprop, path_adam, line_sgd, line_rmsprop, line_adam):
        """
        Update function for animation.

        Args:
            num (int): Current frame number.
            path_sgd (np.ndarray): Path taken by SGD.
            path_rmsprop (np.ndarray): Path taken by RMSProp.
            path_adam (np.ndarray): Path taken by Adam.
            line_sgd, line_rmsprop, line_adam: Line objects for each optimizer.

        Returns:
            list: Updated line objects.
        """
        # Update SGD line
        line_sgd.set_data(path_sgd[:num, 0], path_sgd[:num, 1])
        line_sgd.set_3d_properties(loss_sgd[:num])

        # Update RMSProp line
        line_rmsprop.set_data(path_rmsprop[:num, 0], path_rmsprop[:num, 1])
        line_rmsprop.set_3d_properties(loss_rmsprop[:num])

        # Update Adam line
        line_adam.set_data(path_adam[:num, 0], path_adam[:num, 1])
        line_adam.set_3d_properties(loss_adam[:num])

        return [line_sgd, line_rmsprop, line_adam]

    # Create the animation
    anim = FuncAnimation(fig, update, frames=num_iterations, fargs=(path_sgd, path_rmsprop, path_adam,
                                                                  line_sgd, line_rmsprop, line_adam),
                         interval=500, blit=False, repeat=True)

    plt.close(fig)
    return anim


In [3]:
anim = animate_optimizers()
from IPython.display import HTML
HTML(anim.to_html5_video())