Understanding diffrent types of cost function, and the effect of learnign rates and initialization on the optimization process

In [1]:
import numpy as np
import plotly.graph_objects as go
import ipywidgets as widgets
from ipywidgets import interact

# --- Define cost functions ---
def f_multi(theta):
    return theta**2 + 10*np.sin(theta)

def df_multi(theta):
    return 2*theta + 10*np.cos(theta)

def f_quad(theta):
    return theta**2

def df_quad(theta):
    return 2*theta

def f_rastrigin(theta):
    return 10 + (theta**2 - 10*np.cos(2*np.pi*theta))

def df_rastrigin(theta):
    return 2*theta + 20*np.pi*np.sin(2*np.pi*theta)

# --- Gradient Descent + Animation ---
def gradient_descent(func_type="Non-convex (θ²+10sinθ)", theta0=10.0, learning_rate=0.1, iterations=30):
    # Pick function
    if func_type == "Non-convex (θ²+10sinθ)":
        f, df = f_multi, df_multi
        title_func = "f(θ)=(θ²+10sinθ)"
    elif func_type == "Quadratic (θ²)":
        f, df = f_quad, df_quad
        title_func = "f(θ)=θ²"
    else:
        f, df = f_rastrigin, df_rastrigin
        title_func = "f(θ)=10+(θ²-10cos(2πθ))"

    # Compute path
    x_list = [theta0]
    for i in range(iterations):
        x_list.append(x_list[-1] - learning_rate*df(x_list[-1]))
    y_list = f(np.array(x_list))

    # Function curve range
    if func_type == "Rastrigin":
        theta = np.linspace(-10, 10, 1000)
    else:
        theta = np.linspace(-10, 10, 1000)

    # Base figure
    fig = go.Figure()
    base_curve = go.Scatter(x=theta, y=f(theta), mode="lines", name="f(θ)")
    fig.add_trace(base_curve)

    # Initial point
    fig.add_trace(go.Scatter(x=[x_list[0]], y=[y_list[0]], 
                             mode="markers", marker=dict(color="red", size=10), 
                             name="Current"))

    # Build animation frames
    frames = []
    for i in range(1, len(x_list)):
        frames.append(go.Frame(
            data=[
                base_curve,
                go.Scatter(x=[x_list[i]], y=[y_list[i]], mode="markers",
                           marker=dict(color="red", size=10)),
                go.Scatter(x=x_list[:i+1], y=y_list[:i+1], mode="lines+markers",
                           line=dict(color="red", dash="dot"), showlegend=False)
            ],
            layout=go.Layout(
                annotations=[
                    dict(
                        x=x_list[i], y=y_list[i],
                        ax=x_list[i-1], ay=y_list[i-1],
                        xref="x", yref="y", axref="x", ayref="y",
                        showarrow=True, arrowhead=3, arrowsize=1, arrowwidth=2,
                        arrowcolor="blue"
                    )
                ]
            ),
            name=str(i)
        ))

    # Layout with animation controls
    fig.update_layout(
        xaxis_title="θ",
        yaxis_title=title_func,
        font=dict(family="Times New Roman", size=14),
        #title=f"Gradient Descent Animation on {func_type} (θ₀={theta0}, η={learning_rate})",
        margin=dict(t=120),
        updatemenus=[dict(
            type="buttons",
            showactive=False,
            x=0.5, y=1.2, xanchor="center", yanchor="top",
            direction="right",
            buttons=[
                dict(label="Play", method="animate",
                     args=[None, dict(frame=dict(duration=500, redraw=True),
                                      transition=dict(duration=200),
                                      fromcurrent=True, mode="immediate")]),
                dict(label="Pause", method="animate",
                     args=[[None], dict(mode="immediate")])
            ]
        )]
    )

    fig.frames = frames
    fig.show()

# --- Interactive controls ---
interact(
    gradient_descent,
    func_type=widgets.ToggleButtons(
        options=["Non-convex (θ²+10sinθ)", "Quadratic (θ²)", "Rastrigin"],
        description="Function:"
    ),
    theta0=widgets.FloatSlider(value=10, min=-10, max=10, step=0.5, description="Initial θ₀"),
    learning_rate=widgets.FloatLogSlider(value=0.1, base=10, min=-3, max=0, step=0.1, description="η (lr)"),
    iterations=widgets.IntSlider(value=30, min=5, max=200, step=5, description="Iterations")
);


interactive(children=(ToggleButtons(description='Function:', options=('Non-convex (θ²+10sinθ)', 'Quadratic (θ²…

# 2D

In [None]:
import numpy as np
import plotly.graph_objects as go
import ipywidgets as widgets
from ipywidgets import interact

# --- Define cost functions in 2D ---
def f_quad(theta1, theta2):
    return theta1**2 + theta2**2

def grad_quad(theta1, theta2):
    return 2*theta1, 2*theta2

def f_multi(theta1, theta2):
    return theta1**2 + theta2**2 + 10*np.sin(theta1) + 10*np.sin(theta2)

def grad_multi(theta1, theta2):
    return 2*theta1 + 10*np.cos(theta1), 2*theta2 + 10*np.cos(theta2)

def f_rastrigin(theta1, theta2):
    # Uses π in the cosine terms
    return 20 + (theta1**2 - 10*np.cos(2*np.pi*theta1)) + (theta2**2 - 10*np.cos(2*np.pi*theta2))

def grad_rastrigin(theta1, theta2):
    # Derivatives include 2π
    dtheta1 = 2*theta1 + 20*np.pi*np.sin(2*np.pi*theta1)
    dtheta2 = 2*theta2 + 20*np.pi*np.sin(2*np.pi*theta2)
    return dtheta1, dtheta2

# --- Gradient descent with animation ---
def gradient_descent_2d(func_type="Quadratic", theta0_1=5.0, theta0_2=5.0, learning_rate=0.05, iterations=30):
    # Select function
    if func_type == "Quadratic":
        f, grad = f_quad, grad_quad
        title_func = r"$f(\theta_1,\theta_2)=\theta_1^2+\theta_2^2$"
        lim = 6
    elif func_type == "Non-convex":
        f, grad = f_multi, grad_multi
        title_func = r"$f(\theta_1,\theta_2)=\theta_1^2+\theta_2^2+10\sin(\theta_1)+10\sin(\theta_2)$"
        lim = 6
    else:
        f, grad = f_rastrigin, grad_rastrigin
        title_func = r"$f(\theta_1,\theta_2)=20+(\theta_1^2-10\cos(2\pi\theta_1))+(\theta_2^2-10\cos(2\pi\theta_2))$"
        lim = 5.5

    # Gradient descent path
    t1, t2 = theta0_1, theta0_2
    path_t1, path_t2, path_z = [t1], [t2], [f(t1,t2)]
    for _ in range(iterations):
        g1, g2 = grad(t1, t2)
        t1 -= learning_rate*g1
        t2 -= learning_rate*g2
        path_t1.append(t1)
        path_t2.append(t2)
        path_z.append(f(t1,t2))

    # Create grid for surface
    t1_vals = np.linspace(-lim, lim, 100)
    t2_vals = np.linspace(-lim, lim, 100)
    T1, T2 = np.meshgrid(t1_vals, t2_vals)
    Z = f(T1, T2)

    # Base plot
    fig = go.Figure(data=[go.Surface(x=T1, y=T2, z=Z, colorscale="Viridis", opacity=0.8)])
    fig.add_trace(go.Scatter3d(x=[path_t1[0]], y=[path_t2[0]], z=[path_z[0]],
                               mode="markers", marker=dict(color="red", size=5), name="Start"))

    # Frames
    frames = []
    for i in range(1, len(path_t1)):
        frames.append(go.Frame(
            data=[
                go.Surface(x=T1, y=T2, z=Z, colorscale="Viridis", opacity=0.8, showscale=False),
                go.Scatter3d(x=[path_t1[i]], y=[path_t2[i]], z=[path_z[i]],
                             mode="markers", marker=dict(color="red", size=5)),
                go.Scatter3d(x=path_t1[:i+1], y=path_t2[:i+1], z=path_z[:i+1],
                             mode="lines", line=dict(color="red", dash="dot"), showlegend=False)
            ],
            name=str(i)
        ))

    fig.update_layout(
        width=600,   # set figure width
        height=600,  # set figure height
        scene=dict(
            xaxis_title=r"$\theta_1$",
            yaxis_title=r"$\theta_2$",
            zaxis_title=r"$f(\theta_1,\theta_2)$"   
        ),
        font=dict(family="Times New Roman", size=14),
        #title=f"2D Gradient Descent on {func_type} Function<br>{title_func}, start=({theta0_1},{theta0_2}), η={learning_rate}",
        margin=dict(t=120),
        updatemenus=[dict(
            type="buttons",
            x=0.5, y=1.15, xanchor="center", yanchor="top",
            direction="right",
            buttons=[
                dict(label="Play", method="animate",
                     args=[None, dict(frame=dict(duration=500, redraw=True),
                                      transition=dict(duration=200),
                                      fromcurrent=True, mode="immediate")]),
                dict(label="Pause", method="animate",
                     args=[[None], dict(mode="immediate")])
            ]
        )]
    )

    fig.frames = frames
    fig.show()

# --- Interactive controls ---
interact(
    gradient_descent_2d,
    func_type=widgets.ToggleButtons(
        options=["Quadratic", "Non-convex", "Rastrigin"],
        description="Function:"
    ),
    theta0_1=widgets.FloatSlider(value=5, min=-6, max=6, step=0.5, description="Initial θ₁"),
    theta0_2=widgets.FloatSlider(value=5, min=-6, max=6, step=0.5, description="Initial θ₂"),
    learning_rate=widgets.FloatLogSlider(value=0.05, base=10, min=-3, max=0, step=0.1, description="η (lr)"),
    iterations=widgets.IntSlider(value=30, min=5, max=200, step=5, description="Iterations")
);


interactive(children=(ToggleButtons(description='Function:', options=('Quadratic', 'Non-convex', 'Rastrigin'),…