In [1]:
import numpy as np
import matplotlib.pyplot as plt
import ipywidgets as widgets
from IPython.display import display

# Function to calculate the derivative of a quadratic function
def gradient(x):
    return 2 * x

# Optimizer functions
def gradient_descent(learning_rate, num_iterations):
    x = 10  # Starting point
    path = [x]
    for i in range(num_iterations):
        x -= learning_rate * gradient(x)
        path.append(x)
    return path

def momentum(learning_rate, momentum_coef, num_iterations):
    x = 10  # Starting point
    v = 0
    path = [x]
    for i in range(num_iterations):
        v = momentum_coef * v - learning_rate * gradient(x)
        x += v
        path.append(x)
    return path

def nag(learning_rate, momentum_coef, num_iterations):
    x = 10  # Starting point
    v = 0
    path = [x]
    for i in range(num_iterations):
        lookahead_x = x + momentum_coef * v
        v = momentum_coef * v - learning_rate * gradient(lookahead_x)
        x += v
        path.append(x)
    return path

def adagrad(learning_rate, num_iterations):
    x = 10  # Starting point
    cache = 0
    path = [x]
    for i in range(num_iterations):
        g = gradient(x)
        cache += g ** 2
        adjusted_lr = learning_rate / (np.sqrt(cache) + 1e-8)
        x -= adjusted_lr * g
        path.append(x)
    return path

def rmsprop(learning_rate, decay_rate, num_iterations):
    x = 10  # Starting point
    cache = 0
    path = [x]
    for i in range(num_iterations):
        g = gradient(x)
        cache = decay_rate * cache + (1 - decay_rate) * g ** 2
        adjusted_lr = learning_rate / (np.sqrt(cache) + 1e-8)
        x -= adjusted_lr * g
        path.append(x)
    return path

def adam(learning_rate, beta1, beta2, num_iterations):
    x = 10  # Starting point
    m, v = 0, 0
    path = [x]
    for i in range(num_iterations):
        g = gradient(x)
        m = beta1 * m + (1 - beta1) * g
        v = beta2 * v + (1 - beta2) * g ** 2
        m_hat = m / (1 - beta1 ** (i + 1))
        v_hat = v / (1 - beta2 ** (i + 1))
        x -= learning_rate * m_hat / (np.sqrt(v_hat) + 1e-8)
        path.append(x)
    return path

# Plotting function
def plot_convergence(optimizer, learning_rate, num_iterations, momentum_coef=0.9, decay_rate=0.9, beta1=0.9, beta2=0.999):
    plt.figure(figsize=(10, 6))
    
    if optimizer == "Gradient Descent":
        path = gradient_descent(learning_rate, num_iterations)
    elif optimizer == "Momentum":
        path = momentum(learning_rate, momentum_coef, num_iterations)
    elif optimizer == "NAG":
        path = nag(learning_rate, momentum_coef, num_iterations)
    elif optimizer == "Adagrad":
        path = adagrad(learning_rate, num_iterations)
    elif optimizer == "RMSprop":
        path = rmsprop(learning_rate, decay_rate, num_iterations)
    elif optimizer == "Adam":
        path = adam(learning_rate, beta1, beta2, num_iterations)
    
    plt.plot(path, label=optimizer)
    plt.xlabel('Iteration')
    plt.ylabel('Parameter Value')
    plt.title(f'{optimizer} Convergence')
    plt.grid(True)
    plt.legend()
    plt.show()

# Interactive widgets
optimizer_widget = widgets.Dropdown(
    options=["Gradient Descent", "Momentum", "NAG", "Adagrad", "RMSprop", "Adam"],
    value="Momentum",
    description="Optimizer:"
)

learning_rate_widget = widgets.FloatSlider(
    value=0.1,
    min=0.001,
    max=1.0,
    step=0.01,
    description="Learning Rate:"
)

num_iterations_widget = widgets.IntSlider(
    value=100,
    min=10,
    max=500,
    step=10,
    description="Iterations:"
)

momentum_widget = widgets.FloatSlider(
    value=0.9,
    min=0.5,
    max=0.99,
    step=0.01,
    description="Momentum:"
)

decay_rate_widget = widgets.FloatSlider(
    value=0.9,
    min=0.5,
    max=0.99,
    step=0.01,
    description="Decay Rate:"
)

beta1_widget = widgets.FloatSlider(
    value=0.9,
    min=0.5,
    max=0.99,
    step=0.01,
    description="Beta 1:"
)

beta2_widget = widgets.FloatSlider(
    value=0.999,
    min=0.5,
    max=0.999,
    step=0.01,
    description="Beta 2:"
)

ui = widgets.VBox([
    optimizer_widget, 
    learning_rate_widget, 
    num_iterations_widget,
    momentum_widget,
    decay_rate_widget,
    beta1_widget,
    beta2_widget
])

out = widgets.interactive_output(
    plot_convergence, 
    {
        "optimizer": optimizer_widget, 
        "learning_rate": learning_rate_widget, 
        "num_iterations": num_iterations_widget,
        "momentum_coef": momentum_widget,
        "decay_rate": decay_rate_widget,
        "beta1": beta1_widget,
        "beta2": beta2_widget
    }
)

display(ui, out)


VBox(children=(Dropdown(description='Optimizer:', index=1, options=('Gradient Descent', 'Momentum', 'NAG', 'Ad…

Output()

In [6]:

# Function to calculate the derivative of a quadratic function
def gradient(x):
    return x ** 2

In [7]:
gradient(4)

16