In [10]:
from Solver.Solver import AdaGrad, RMSPropMomentum, AdamMomentum
from Solver.NonlocalSolver import NonlocalSolverAdaGrad, NonlocalSolverMomentumAdam, NonlocalSolverMomentumRMSProp
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots

## Figure 1 - Loss

In [11]:
# Generate some example data
np.random.seed(42)
samples = 1000
x = np.random.rand(samples)  # 1000 input samples
true_theta = 2
y = true_theta * x + np.random.normal(0, 0.1, samples)  # Outputs with some noise

learning_rates = [0.1, 0.01]

# Create the figure with two subplots, one for each learning rate
fig = make_subplots(rows=1, cols=2, subplot_titles=(f"Learning Rate {learning_rates[0]}", f"Learning Rate {learning_rates[1]}"))

# Define the derivative of the MSE
def dL(theta_t):
    if np.isscalar(theta_t) or (isinstance(theta_t, np.ndarray) and theta_t.ndim == 0):
        gradient = -2 * np.mean((y - theta_t * x) * x)
    else:
        gradient = np.array([-2 * np.mean((y - theta * x) * x) for theta in theta_t])
    return gradient

# Define the MSE loss function
def mse(theta):
    return np.mean((y - theta * x) ** 2)

# Define the system dynamics function
f = lambda t, y: 0.0

for idx, lr in enumerate(learning_rates):
    if lr == 0.1:
        k = 60
    else:
        k = 300

    # AdaGrad Optimizer
    print(f'AdaGrad Optimizer (lr={lr})')
    adagrad_optimizer = AdaGrad(dL=dL, lr=lr, epochs=k)
    theta_initial = 0.0
    theta_result_adagrad, _, _ = adagrad_optimizer.solve(theta_initial)
    loss_values_adagrad = [mse(theta) for theta in theta_result_adagrad]

    # RMSPropMomentum Optimizer
    print(f'\nRMSProp Optimizer (lr={lr})')
    rmsprop_optimizer = RMSPropMomentum(dL=dL, lr=lr, beta=0.9, epochs=k)
    theta_result_rmsprop, _, _ = rmsprop_optimizer.solve(theta_initial)
    loss_values_rmsprop = [mse(theta) for theta in theta_result_rmsprop]

    # AdamMomentum Optimizer
    print(f'\nAdam Optimizer (lr={lr})')
    adam_optimizer = AdamMomentum(dL=dL, lr=lr, beta1 = 0.9, beta2 = 0.99, epochs=k)
    theta_result_adam, _, _, _ = adam_optimizer.solve(theta_initial)
    loss_values_adam = [mse(theta) for theta in theta_result_adam]

    # NonlocalSolverAdaGrad
    print(f'\nNonlocal AdaGrad Optimizer (lr={lr})')
    adagrad_nonlocal_solver = NonlocalSolverAdaGrad(f=f, dL=dL, t_span=[1e-12, k * lr], y0=np.array([0.0]), alpha=lr)
    t_adagrad, y_adagrad = adagrad_nonlocal_solver.solve()
    loss_values_nonlocal_adagrad = [mse(theta) for theta in y_adagrad]

    # NonlocalSolverMomentumRMSProp
    print(f'\nNonlocal RMSProp Optimizer (lr={lr})')
    rmsprop_nonlocal_solver = NonlocalSolverMomentumRMSProp(f=f, dL=dL, t_span=[1e-12, k * lr], y0=np.array([0.0]), 
                                                            beta=0.9, alpha=lr)
    t_rmsprop, y_rmsprop = rmsprop_nonlocal_solver.solve()
    loss_values_nonlocal_rmsprop = [mse(theta) for theta in y_rmsprop]

    # NonlocalSolverMomentumAdam
    print(f'\nNonlocal Adam Optimizer (lr={lr})')
    adam_nonlocal_solver = NonlocalSolverMomentumAdam(f=f, dL=dL, t_span=[1e-12, k * lr], y0=np.array([0.0]), 
                                                      betas=[0.9, 0.99], alpha=lr)
    t_adam, y_adam = adam_nonlocal_solver.solve()
    loss_values_nonlocal_adam = [mse(theta) for theta in y_adam]

    # Plot AdaGrad results
    fig.add_trace(go.Scatter(x=list(range(len(loss_values_adagrad))), y=loss_values_adagrad, 
                             mode='markers', 
                             marker=dict(symbol='x', size=4, line=dict(width=0.01), color='blue'), 
                             name='AdaGrad', 
                             legendgroup='AdaGrad', 
                             showlegend=(idx == 0)),  # Show legend only once
                  row=1, col=idx + 1)

    # Plot RMSProp results
    fig.add_trace(go.Scatter(x=list(range(len(loss_values_rmsprop))), y=loss_values_rmsprop, 
                             mode='markers', 
                             marker=dict(symbol='diamond', size=4, line=dict(width=0.01), color='green'), 
                             name='RMSProp', 
                             legendgroup='RMSProp', 
                             showlegend=(idx == 0)),  # Show legend only once
                  row=1, col=idx + 1)

    # Plot Adam results
    fig.add_trace(go.Scatter(x=list(range(len(loss_values_adam))), y=loss_values_adam, 
                             mode='markers', 
                             marker=dict(symbol='cross', size=4, line=dict(width=0.01), color='red'), 
                             name='Adam', 
                             legendgroup='Adam', 
                             showlegend=(idx == 0)),  # Show legend only once
                  row=1, col=idx + 1)

    # Plot Nonlocal AdaGrad results
    fig.add_trace(go.Scatter(x=t_adagrad / lr, y=loss_values_nonlocal_adagrad, 
                             mode='lines', line=dict(color='blue'), 
                             name='Nonlocal AdaGrad', 
                             legendgroup='Nonlocal AdaGrad', 
                             showlegend=(idx == 0)),  # Show legend only once
                  row=1, col=idx + 1)

    # Plot Nonlocal RMSProp results
    fig.add_trace(go.Scatter(x=t_rmsprop / lr, y=loss_values_nonlocal_rmsprop, 
                             mode='lines', line=dict(color='green'), 
                             name='Nonlocal RMSProp', 
                             legendgroup='Nonlocal RMSProp', 
                             showlegend=(idx == 0)),  # Show legend only once
                  row=1, col=idx + 1)

    # Plot Nonlocal Adam results
    fig.add_trace(go.Scatter(x=t_adam / lr, y=loss_values_nonlocal_adam, 
                             mode='lines', line=dict(color='red'), 
                             name='Nonlocal Adam', 
                             legendgroup='Nonlocal Adam', 
                             showlegend=(idx == 0)),  # Show legend only once
                  row=1, col=idx + 1)

# Configure the layout of the plot
fig.update_layout(
    title='MSE Loss Convergence for Different Solvers',
    width=1500,  # Width in pixels
    height=600,  # Height in pixels
    showlegend=True
)

fig.update_xaxes(title_text="k")
fig.update_yaxes(tickformat=".1f", title_text="MSE Loss")

# Show the figure
fig.show()

AdaGrad Optimizer (lr=0.1)
Epoch: 50, Error: 0.00990047559160212.
Last epoch: 61, Error: 0.008546257172085792.

RMSProp Optimizer (lr=0.1)
Epoch: 50, Error: 1.8282095837207635e-08.
Last epoch: 61, Error: 4.440892098500626e-16.

Adam Optimizer (lr=0.1)
Epoch: 50, Error: 0.018184111316941642.
Last epoch: 61, Error: 0.005854845256346275.

Nonlocal AdaGrad Optimizer (lr=0.1)
Iteration 0 advanced. Current error: 7.428292388552517.
Iteration 1 advanced. Current error: 3.3178003797929185.
Iteration 2 advanced. Current error: 1.435288526194245.
Iteration 3 advanced. Current error: 0.6098177413210666.
Iteration 4 advanced. Current error: 0.2575075751147127.
Iteration 5 advanced. Current error: 0.1089795709866404.
Iteration 6 advanced. Current error: 0.04649423649584891.
Iteration 7 advanced. Current error: 0.02007792873874675.
Iteration 8 advanced. Current error: 0.008798100954047977.
Iteration 9 advanced. Current error: 0.003915674996545342.
Iteration 10 advanced. Current error: 0.001769107206

## Figure 2 - Loss

In [12]:
# Generate some example data
np.random.seed(42)
samples = 1000
x = np.random.rand(samples)  # 1000 input samples
true_theta = 2
y = true_theta * x + np.random.normal(0, 0.1, samples)  # Outputs with some noise

learning_rates = [0.1, 0.01]

# Create the figure with two subplots, one for each learning rate
fig = make_subplots(rows=1, cols=2, subplot_titles=(f"Learning Rate {learning_rates[0]}", f"Learning Rate {learning_rates[1]}"))

# Define the derivative of the MSE
def dL(theta_t):
    if np.isscalar(theta_t) or (isinstance(theta_t, np.ndarray) and theta_t.ndim == 0):
        gradient = -2 * np.mean((y - theta_t * x) * x)
    else:
        gradient = np.array([-2 * np.mean((y - theta * x) * x) for theta in theta_t])
    return gradient

# Define the MSE loss function
def mse(theta):
    return np.mean((y - theta * x) ** 2)

# Define the system dynamics function
f = lambda t, y: 0.0

for idx, lr in enumerate(learning_rates):
    if lr == 0.1:
        k = 80
    else:
        k = 500

    # AdaGrad Optimizer
    print(f'AdaGrad Optimizer (lr={lr})')
    adagrad_optimizer = AdaGrad(dL=dL, lr=lr, epochs=k)
    theta_initial = 0.0
    theta_result_adagrad, _, _ = adagrad_optimizer.solve(theta_initial)
    loss_values_adagrad = [mse(theta) for theta in theta_result_adagrad]

    # RMSPropMomentum Optimizer
    print(f'\nRMSProp Optimizer (lr={lr})')
    rmsprop_optimizer = RMSPropMomentum(dL=dL, lr=lr, beta=0.99, epochs=k)
    theta_result_rmsprop, _, _ = rmsprop_optimizer.solve(theta_initial)
    loss_values_rmsprop = [mse(theta) for theta in theta_result_rmsprop]

    # AdamMomentum Optimizer
    print(f'\nAdam Optimizer (lr={lr})')
    adam_optimizer = AdamMomentum(dL=dL, lr=lr, beta1 = 0.99, beta2 = 0.999, epochs=k)
    theta_result_adam, _, _, _ = adam_optimizer.solve(theta_initial)
    loss_values_adam = [mse(theta) for theta in theta_result_adam]

    # NonlocalSolverAdaGrad
    print(f'\nNonlocal AdaGrad Optimizer (lr={lr})')
    adagrad_nonlocal_solver = NonlocalSolverAdaGrad(f=f, dL=dL, t_span=[1e-12, k * lr], y0=np.array([0.0]), alpha=lr)
    t_adagrad, y_adagrad = adagrad_nonlocal_solver.solve()
    loss_values_nonlocal_adagrad = [mse(theta) for theta in y_adagrad]

    # NonlocalSolverMomentumRMSProp
    print(f'\nNonlocal RMSProp Optimizer (lr={lr})')
    rmsprop_nonlocal_solver = NonlocalSolverMomentumRMSProp(f=f, dL=dL, t_span=[1e-12, k * lr], y0=np.array([0.0]), 
                                                            beta=0.99, alpha=lr)
    t_rmsprop, y_rmsprop = rmsprop_nonlocal_solver.solve()
    loss_values_nonlocal_rmsprop = [mse(theta) for theta in y_rmsprop]

    # NonlocalSolverMomentumAdam
    print(f'\nNonlocal Adam Optimizer (lr={lr})')
    adam_nonlocal_solver = NonlocalSolverMomentumAdam(f=f, dL=dL, t_span=[1e-12, k * lr], y0=np.array([0.0]), 
                                                      betas=[0.99, 0.999], alpha=lr)
    t_adam, y_adam = adam_nonlocal_solver.solve()
    loss_values_nonlocal_adam = [mse(theta) for theta in y_adam]

    # Plot AdaGrad results
    fig.add_trace(go.Scatter(x=list(range(len(loss_values_adagrad))), y=loss_values_adagrad, 
                             mode='markers', 
                             marker=dict(symbol='x', size=4, line=dict(width=0.01), color='blue'), 
                             name='AdaGrad', 
                             legendgroup='AdaGrad', 
                             showlegend=(idx == 0)),  # Show legend only once
                  row=1, col=idx + 1)

    # Plot RMSProp results
    fig.add_trace(go.Scatter(x=list(range(len(loss_values_rmsprop))), y=loss_values_rmsprop, 
                             mode='markers', 
                             marker=dict(symbol='diamond', size=4, line=dict(width=0.01), color='green'), 
                             name='RMSProp', 
                             legendgroup='RMSProp', 
                             showlegend=(idx == 0)),  # Show legend only once
                  row=1, col=idx + 1)

    # Plot Adam results
    fig.add_trace(go.Scatter(x=list(range(len(loss_values_adam))), y=loss_values_adam, 
                             mode='markers', 
                             marker=dict(symbol='cross', size=4, line=dict(width=0.01), color='red'), 
                             name='Adam', 
                             legendgroup='Adam', 
                             showlegend=(idx == 0)),  # Show legend only once
                  row=1, col=idx + 1)

    # Plot Nonlocal AdaGrad results
    fig.add_trace(go.Scatter(x=t_adagrad / lr, y=loss_values_nonlocal_adagrad, 
                             mode='lines', line=dict(color='blue'), 
                             name='Nonlocal AdaGrad', 
                             legendgroup='Nonlocal AdaGrad', 
                             showlegend=(idx == 0)),  # Show legend only once
                  row=1, col=idx + 1)

    # Plot Nonlocal RMSProp results
    fig.add_trace(go.Scatter(x=t_rmsprop / lr, y=loss_values_nonlocal_rmsprop, 
                             mode='lines', line=dict(color='green'), 
                             name='Nonlocal RMSProp', 
                             legendgroup='Nonlocal RMSProp', 
                             showlegend=(idx == 0)),  # Show legend only once
                  row=1, col=idx + 1)

    # Plot Nonlocal Adam results
    fig.add_trace(go.Scatter(x=t_adam / lr, y=loss_values_nonlocal_adam, 
                             mode='lines', line=dict(color='red'), 
                             name='Nonlocal Adam', 
                             legendgroup='Nonlocal Adam', 
                             showlegend=(idx == 0)),  # Show legend only once
                  row=1, col=idx + 1)

# Configure the layout of the plot
fig.update_layout(
    title='MSE Loss Convergence for Different Solvers',
    width=1500,  # Width in pixels
    height=600,   # Height in pixels
    showlegend=True
)

fig.update_xaxes(title_text="k")
fig.update_yaxes(tickformat=".1f", title_text="MSE Loss")

# Show the figure
fig.show()

AdaGrad Optimizer (lr=0.1)
Epoch: 50, Error: 0.00990047559160212.
Last epoch: 81, Error: 0.0066155266574350335.

RMSProp Optimizer (lr=0.1)
Epoch: 50, Error: 9.769962616701378e-15.
Last epoch: 81, Error: 0.0.

Adam Optimizer (lr=0.1)
Epoch: 50, Error: 0.011793465383705737.
Last epoch: 81, Error: 0.03737788456630664.

Nonlocal AdaGrad Optimizer (lr=0.1)
Iteration 0 advanced. Current error: 10.091582349389512.
Iteration 1 advanced. Current error: 4.383187962036377.
Iteration 2 advanced. Current error: 1.8123810018983866.
Iteration 3 advanced. Current error: 0.7282859203806222.
Iteration 4 advanced. Current error: 0.29042470135835324.
Iteration 5 advanced. Current error: 0.11687936481057892.
Iteration 6 advanced. Current error: 0.04805293014700324.
Iteration 7 advanced. Current error: 0.020315106518183253.
Iteration 8 advanced. Current error: 0.008832027541970903.
Iteration 9 advanced. Current error: 0.00392957657338599.
Iteration 10 advanced. Current error: 0.0017780142210741432.
Iterati