# Non-Convex Optimization 2d

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.animation import FuncAnimation
from scipy.optimize import minimize
from ipywidgets import Dropdown
from IPython.display import HTML
%matplotlib inline

###  Function to Optimize

In [None]:
# # Define the one-dimensional non-convex function
def non_convex_function(x):
    return x**4 - 3*x**3 + 2

# # Define the gradient of the one-dimensional non-convex function
def gradient_non_convex(x):
    return 4*x**3 - 9*x**2

# Plot the one-dimensional non-convex function
x = np.linspace(-2, 3, 100)
y = non_convex_function(x)

plt.figure(figsize=(10, 6))
plt.plot(x, y, label='Non-Convex Function')
plt.title('One-dimensional Non-Convex Function')
plt.xlabel('x')
plt.ylabel('f(x)')
plt.legend()
plt.grid(True)
plt.show()

###  SGD Optimizer Variants

In [None]:
# Optimization algorithms
def sgd(func, gradient, x0, learning_rate=0.01, max_iter=100):
    x = x0
    trajectory = [x]
    for _ in range(max_iter):
        # Compute gradient of the objective function at current point
        g = gradient(x)
        # Update parameters using gradient descent
        x = x - learning_rate * g
        trajectory.append(x)
    return np.array(trajectory)

def adagrad(func, gradient, x0, learning_rate=0.1, epsilon=1e-8, max_iter=100):
    cache = 0
    x = x0
    trajectory = [x]
    for _ in range(max_iter):
        # Compute gradient of the objective function at current point
        g = gradient(x)
        # Accumulate squared gradients
        cache += g ** 2
        # Update parameters using adaptive learning rates
        x = x - learning_rate * g / (np.sqrt(cache) + epsilon)
        trajectory.append(x)
    return np.array(trajectory)

def rmsprop(func, gradient, x0, learning_rate=0.1, beta=0.9, epsilon=1e-8, max_iter=100):
    cache = 0
    x = x0
    trajectory = [x]
    for _ in range(max_iter):
        # Compute gradient of the objective function at current point
        g = gradient(x)
        # Update moving average of squared gradients
        cache = beta * cache + (1 - beta) * (g ** 2)
        # Update parameters using adaptive learning rates
        x = x - learning_rate * g / (np.sqrt(cache) + epsilon)
        trajectory.append(x)
    return np.array(trajectory)

def adam(func, gradient, x0, learning_rate=0.1, beta1=0.9, beta2=0.999, epsilon=1e-8, max_iter=100):
    m = 0
    v = 0
    x = x0
    trajectory = [x]
    for t in range(max_iter):  # Introduce the loop variable 't'
        # Compute gradient of the objective function at current point
        g = gradient(x)
        # Update biased first moment estimate
        m = beta1 * m + (1 - beta1) * g
        # Update biased second raw moment estimate
        v = beta2 * v + (1 - beta2) * (g ** 2)
        # Correct bias in first moment estimate
        m_hat = m / (1 - beta1 ** (t+1))  # Fix 't' index by adding 1 to it
        # Correct bias in second moment estimate
        v_hat = v / (1 - beta2 ** (t+1))  # Fix 't' index by adding 1 to it
        # Update parameters using adaptive learning rates
        x = x - learning_rate * m_hat / (np.sqrt(v_hat) + epsilon)
        trajectory.append(x)
    return np.array(trajectory)

### Optimization

In [None]:
# Initial point for optimization
x0 = -1.5

# Perform optimization using Adagrad
sgd_trajectory = sgd(non_convex_function, gradient_non_convex, x0)

# Perform optimization using Adagrad
adagrad_trajectory = adagrad(non_convex_function, gradient_non_convex, x0)

# Perform optimization using RMSProp
rmsprop_trajectory = rmsprop(non_convex_function, gradient_non_convex, x0)

# Perform optimization using Adam
adam_trajectory = adam(non_convex_function, gradient_non_convex, x0)

### Animation

In [None]:
# Plot the one-dimensional non-convex function
x = np.linspace(-2, 3, 100)
y = non_convex_function(x)

# Set up the figure and axis
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(x, y, label='Non-Convex Function')
ax.set_title('Non-Convex Optimization')
ax.set_xlabel('x')
ax.set_ylabel('f(x)')
plt.ylabel('f(x)')
plt.legend()
plt.grid(True)

# Initialize empty lines for the trajectories
sgd_line, = ax.plot([], [], label='SGD', lw=2)
adagrad_line, = ax.plot([], [], label='Adagrad', lw=2)
rmsprop_line, = ax.plot([], [], label='RMSProp', lw=2)
adam_line, = ax.plot([], [], label='Adam', lw=2)

# Add legend
ax.legend(loc='upper right')


# Initialize the animation
def init():
    sgd_line.set_data([], [])
    adagrad_line.set_data([], [])
    rmsprop_line.set_data([], [])
    adam_line.set_data([], [])
    return sgd_line, adagrad_line, rmsprop_line, adam_line

# Update function for animation
def update(i, optimizer):
    if optimizer == 'SGD':
        trajectory = sgd_trajectory
        line = sgd_line
    elif optimizer == 'Adagrad':
        trajectory = adagrad_trajectory
        line = adagrad_line
    elif optimizer == 'RMSProp':
        trajectory = rmsprop_trajectory
        line = rmsprop_line
    else:
        trajectory = adam_trajectory
        line = adam_line
    
    line.set_data(trajectory[:i+1], non_convex_function(trajectory[:i+1]))
    return line,

# Create animations for each optimizer
animations = {}
for optimizer in ['SGD', 'Adagrad', 'RMSProp', 'Adam']:
    ani = FuncAnimation(fig, update, frames=len(sgd_trajectory), init_func=init, fargs=(optimizer,), blit=True)
    animations[optimizer] = ani

# Create HTML content for each animation
animation_html = {k: v.to_jshtml() for k, v in animations.items()}

# Display animations
HTML('\n'.join(animation_html.values()))
