# ADAM Simulations

## Parameters

In [2]:
from Solver.Solver import AdamMomentum
from Solver.NonlocalSolver import NonlocalSolverMomentumAdam
from sklearn.model_selection import ParameterGrid
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots

param_grid = {'lr': [0.1, 0.01], 'beta1': [0.9, 0.0],'beta2': [0.99, 0.999]}
n_learning_rates = len(param_grid['lr'])
param_list = list(ParameterGrid(param_grid))

dL = lambda y: 2 * (y - 4)
f = lambda x, y: 0.0
t = [1e-12, 10]

## Adam - Discrete

In [3]:
# Creation of subplots
fig_theta = make_subplots(rows=1, cols=n_learning_rates, 
                          subplot_titles=[f'Learning Rate = {lr}' for lr in param_grid['lr']])
fig_m = make_subplots(rows=1, cols=n_learning_rates, 
                      subplot_titles=[f'Learning Rate = {lr}' for lr in param_grid['lr']])
fig_v = make_subplots(rows=1, cols=n_learning_rates, 
                      subplot_titles=[f'Learning Rate = {lr}' for lr in param_grid['lr']])

# Iterate over each learning rate in the parameter grid
for i, lr in enumerate(param_grid['lr']):

    # Filter the parameter list to get only those with the current learning rate
    filtered_params = [p for p in param_list if p['lr'] == lr]

    # Set the number of epochs based on the learning rate
    if lr == 0.1:
        epochs = 100
    elif lr == 0.01:
        epochs = 1000

    # Iterate over each set of filtered parameters
    for params in filtered_params:
        theta_initial = 1.0  # Initial value for theta
        
        print(f'\nAdam Configuration: {params}')
        
        # Initialize and solve the AdamMomentum optimizer with the given parameters
        solver = AdamMomentum(dL=dL, lr=lr, beta1=params['beta1'], beta2=params['beta2'], epochs=epochs)
        solver.solve(theta_initial=theta_initial)

        label = f"beta1={params['beta1']}, beta2={params['beta2']}"
        
        # Add data to the theta figure
        fig_theta.add_trace(go.Scatter(
            x=list(range(epochs)),
            y=solver.theta_result,
            mode='lines',
            name=label,
            legendgroup=f'LR={lr}',
        ), row=1, col=i+1)
        
        # Add data to the m_result figure
        fig_m.add_trace(go.Scatter(
            x=list(range(epochs)),
            y=solver.m_result,
            mode='markers',
            marker=dict(size=3),
            name=label,
            legendgroup=f'LR={lr}',
        ), row=1, col=i+1)
        
        # Add data to the v_result figure
        fig_v.add_trace(go.Scatter(
            x=list(range(epochs)),
            y=solver.v_result,
            mode='markers',
            marker=dict(size=3),
            name=label,
            legendgroup=f'LR={lr}',
        ), row=1, col=i+1)

# Update titles and labels for the figures
fig_theta.update_layout(title_text='Theta values convergence trajectories for the Adam Optimizer', showlegend=True)
fig_m.update_layout(title_text='First moment (m) convergence trajectories for the Adam Optimizer', showlegend=True)
fig_v.update_layout(title_text='Second moment (v) convergence trajectories for the Adam Optimizer', showlegend=True)

fig_theta.update_xaxes(title_text="k")
fig_theta.update_yaxes(tickformat=".1f", title_text="Theta_k")
fig_theta.update_layout(
    width=1500,  # Width in pixels
    height=600   # Height in pixels
)

fig_m.update_xaxes(title_text="k")
fig_m.update_yaxes(title_text="m_k")
fig_m.update_layout(
    width=1500,  # Width in pixels
    height=600   # Height in pixels
)

fig_v.update_xaxes(title_text="k")
fig_v.update_yaxes(tickformat=".1f", title_text="v_k")
fig_v.update_layout(
    width=1500,  # Width in pixels
    height=600   # Height in pixels
)

# Show the figures
fig_theta.show()
fig_m.show()
fig_v.show()


Adam Configuration: {'beta1': 0.9, 'beta2': 0.99, 'lr': 0.1}
Epoch: 50, Error: 0.004799330898878296.
Epoch: 100, Error: 0.0021703457886625976.
Last epoch: 101, Error: 0.0021703457886625976.

Adam Configuration: {'beta1': 0.9, 'beta2': 0.999, 'lr': 0.1}
Epoch: 50, Error: 0.006239302049674045.
Epoch: 100, Error: 0.0011988458789149448.
Last epoch: 101, Error: 0.0011988458789149448.

Adam Configuration: {'beta1': 0.0, 'beta2': 0.99, 'lr': 0.1}
Epoch: 50, Error: 0.016781864495888144.
Epoch: 100, Error: 0.00020620802432569363.
Last epoch: 101, Error: 0.00020620802432569363.

Adam Configuration: {'beta1': 0.0, 'beta2': 0.999, 'lr': 0.1}
Epoch: 50, Error: 0.01684481567661056.
Epoch: 100, Error: 0.0003506600043072794.
Last epoch: 101, Error: 0.0003506600043072794.

Adam Configuration: {'beta1': 0.9, 'beta2': 0.99, 'lr': 0.01}
Epoch: 50, Error: 0.009498909793875132.
Epoch: 100, Error: 0.008722426780408243.
Epoch: 150, Error: 0.007945940231400606.
Epoch: 200, Error: 0.0071417128212911685.
Epoch:

## Nonlocal Adam

In [5]:
# Creation of subplots for theta, m, and v, with titles for each learning rate
fig_theta = make_subplots(rows=1, cols=n_learning_rates, 
                          subplot_titles=[f'Learning Rate = {lr}' for lr in param_grid['lr']])
fig_m = make_subplots(rows=1, cols=n_learning_rates, 
                      subplot_titles=[f'Learning Rate = {lr}' for lr in param_grid['lr']])
fig_v = make_subplots(rows=1, cols=n_learning_rates, 
                      subplot_titles=[f'Learning Rate = {lr}' for lr in param_grid['lr']])

# Update the layout of the figures with titles describing the plotted data
fig_theta.update_layout(title_text='Theta values convergence trajectories for the first-order nonlocal continuous Adam')
fig_m.update_layout(title_text='First moment (m) over time for the first-order nonlocal continuous Adam')
fig_v.update_layout(title_text='Second moment (v) over time for the first-order nonlocal continuous Adam')

# Iterate over each learning rate in the parameter grid
for i, lr in enumerate(param_grid['lr']):

    # Filter the parameter list to get only those with the current learning rate
    filtered_params = [p for p in param_list if p['lr'] == lr]

    # Iterate over each set of filtered parameters
    for params in filtered_params:
        print(f'\nNonlocal Continuous Adam Configuration: {params}')

        # Initialize and solve the NonlocalSolverMomentumAdam with the given parameters
        solver = NonlocalSolverMomentumAdam(f=f, dL=dL, t_span=t, y0=np.array([1.0]), alpha=params['lr'],
                                        betas=[params['beta1'], params['beta2']])
        t_values, y_values = solver.solve()
        
        label = f"beta1={params['beta1']}, beta2={params['beta2']}"

        # Add data to the theta figure
        fig_theta.add_trace(go.Scatter(
            x=t_values/params['lr'],  # Normalize time by learning rate
            y=y_values,  # Solution values for theta
            mode='lines',
            name=label,
            legendgroup=f'LR={lr}',
        ), row=1, col=i+1)
        
        # Retrieve the numerator (m) and denominator (v) values from the solver
        numerators = solver.m
        denominators = solver.v

        # Add data to the m figure (numerators)
        fig_m.add_trace(go.Scatter(
            x=[item[0]/params['lr'] for item in numerators],  # Extract time values
            y=[item[1] for item in numerators],  # Normalize m values by learning rate
            mode='markers',
            marker=dict(size=3),
            name=label,
            legendgroup=f'LR={lr}',
        ), row=1, col=i+1)
        
        # Add data to the v figure (denominators)
        fig_v.add_trace(go.Scatter(
            x=[item[0]/params['lr'] for item in denominators],  # Extract time values
            y=[item[1] for item in denominators],  # Normalize v values by learning rate
            mode='markers',
            marker=dict(size=3),
            name=label,
            legendgroup=f'LR={lr}',
        ), row=1, col=i+1)

# Update the axes labels for each figure
fig_theta.update_xaxes(title_text="t/alpha")
fig_theta.update_yaxes(tickformat=".1f", title_text="Theta(t)")
fig_theta.update_layout(
    width=1500,  # Width in pixels
    height=600   # Height in pixels
)

fig_m.update_xaxes(title_text="t/alpha")
fig_m.update_yaxes(title_text="m(t)")
fig_m.update_layout(
    width=1500,  # Width in pixels
    height=600   # Height in pixels
)

fig_v.update_xaxes(title_text="t/alpha")
fig_v.update_yaxes(title_text="v(t)")
fig_v.update_layout(
    width=1500,  # Width in pixels
    height=600   # Height in pixels
)

# Show the figures
fig_theta.show()
fig_m.show()
fig_v.show()



Nonlocal Continuous Adam Configuration: {'beta1': 0.9, 'beta2': 0.99, 'lr': 0.1}
Iteration 0 advanced. Current error: 56.71356670224943.
Iteration 1 advanced. Current error: 12.43481649320467.
Iteration 2 advanced. Current error: 13.725138464781748.
Iteration 3 advanced. Current error: 6.874377826246943.
Iteration 4 advanced. Current error: 2.907268313448646.
Iteration 5 advanced. Current error: 2.773298844721809.
Iteration 6 advanced. Current error: 1.6314397148312525.
Iteration 7 advanced. Current error: 0.674724634408088.
Iteration 8 advanced. Current error: 0.3501578248265214.
Iteration 9 advanced. Current error: 0.27708083797784533.
Iteration 10 advanced. Current error: 0.19025884747308255.
Iteration 11 advanced. Current error: 0.10817504414696079.
Iteration 12 advanced. Current error: 0.05305947978287799.
Iteration 13 advanced. Current error: 0.024274155488693192.
Iteration 14 advanced. Current error: 0.012714939481826198.
Iteration 15 advanced. Current error: 0.0084049242177884

## Both Models Together

In [5]:
# Configuration of colors for each parameter combination
config_colors = {
    (0.9, 0.99): 'blue',   
    (0.9, 0.999): 'green', 
    (0.0, 0.99): 'red',
    (0.0, 0.999): 'purple'
}

# Create figures for theta, m, and v convergence trajectories
fig_theta = make_subplots(
    rows=1,
    cols=2,  # Two columns, one for each learning rate
    subplot_titles=[f'Learning Rate = {lr}' for lr in param_grid['lr']]
)

fig_m = make_subplots(
    rows=1,
    cols=2,  # Two columns, one for each learning rate
    subplot_titles=[f'Learning Rate = {lr}' for lr in param_grid['lr']]
)

fig_v = make_subplots(
    rows=1,
    cols=2,  # Two columns, one for each learning rate
    subplot_titles=[f'Learning Rate = {lr}' for lr in param_grid['lr']]
)

# Iterate over each learning rate in the parameter grid
for i, lr in enumerate(param_grid['lr']):

    # Filter the parameter list to get only those with the current learning rate
    filtered_params = [p for p in param_list if p['lr'] == lr]

    # Set the number of epochs based on the learning rate
    if lr == 0.1:
        epochs = 100
        t = [1e-12, epochs * lr]
    elif lr == 0.01:
        epochs = 1000
        t = [1e-12, epochs * lr]

    # Adam simulations
    for params in filtered_params:
        theta_initial = 1.0  # Initial value for theta
        
        print(f'\nAdam Configuration: {params}')
        
        # Initialize and solve the Adam optimizer with momentum
        solver = AdamMomentum(dL=dL, lr=lr, beta1=params['beta1'], beta2=params['beta2'], epochs=epochs)
        solver.solve(theta_initial=theta_initial)

        # Determine color for the current configuration
        color = config_colors[(params['beta1'], params['beta2'])]
        
        # Add data to the theta figure
        fig_theta.add_trace(go.Scatter(
            x=list(range(epochs)),
            y=solver.theta_result,
            mode='markers',
            marker=dict(symbol='x', size=4, line=dict(width=0.01), color=color),
            name=f'Adam beta1={params["beta1"]}, beta2={params["beta2"]}',  # Simplified name
            legendgroup=f'Adam {params["beta1"]},{params["beta2"]}', 
            showlegend=(i == 0)  # Show legend only once
        ), row=1, col=i+1)
        
        # Add data to the m_result figure
        fig_m.add_trace(go.Scatter(
            x=list(range(epochs)),
            y=solver.m_result,
            mode='markers',
            marker=dict(symbol='x', size=4, line=dict(width=0.01), color=color),
            name=f'Adam beta1={params["beta1"]}, beta2={params["beta2"]}',  # Simplified name
            legendgroup=f'Adam {params["beta1"]},{params["beta2"]}', 
            showlegend=(i == 0)  # Show legend only once
        ), row=1, col=i+1)
        
        # Add data to the v_result figure
        fig_v.add_trace(go.Scatter(
            x=list(range(epochs)),
            y=solver.v_result,
            mode='markers',
            marker=dict(symbol='x', size=4, line=dict(width=0.01), color=color),
            name=f'Adam beta1={params["beta1"]}, beta2={params["beta2"]}',  # Simplified name
            legendgroup=f'Adam {params["beta1"]},{params["beta2"]}', 
            showlegend=(i == 0)  # Show legend only once
        ), row=1, col=i+1)

    # Nonlocal Continuous Adam simulations
    for params in filtered_params:
        color = config_colors[(params['beta1'], params['beta2'])]

        print(f'\nNonlocal Continuous Adam Configuration: {params}')

        # Initialize and solve the Nonlocal Continuous Adam optimizer
        solver_nonlocal = NonlocalSolverMomentumAdam(f=f, dL=dL, t_span=t, y0=np.array([1.0]), alpha=params['lr'],
                                                     betas=[params['beta1'], params['beta2']])
        t_values, y_values = solver_nonlocal.solve()
        
        # Add data to the theta figure
        fig_theta.add_trace(go.Scatter(
            x=t_values/params['lr'],  # Normalize time by learning rate
            y=y_values,  # Solution values for theta
            mode='lines',
            line=dict(color=color),
            name=f'Nonlocal Adam beta1={params["beta1"]}, beta2={params["beta2"]}',  # Simplified name
            legendgroup=f'Nonlocal Adam {params["beta1"]},{params["beta2"]}', 
            showlegend=(i == 0)  # Show legend only once
        ), row=1, col=i+1)
        
        # Retrieve the numerator (m) and denominator (v) values from the solver
        numerators = solver_nonlocal.m
        denominators = solver_nonlocal.v

        # Add data to the m figure (numerators)
        fig_m.add_trace(go.Scatter(
            x=[item[0]/params['lr'] for item in numerators],  # Extract time values
            y=[item[1] for item in numerators],  # Normalize m values by learning rate
            mode='lines',
            line=dict(color=color),
            name=f'Nonlocal Adam beta1={params["beta1"]}, beta2={params["beta2"]}',  # Simplified name
            legendgroup=f'Nonlocal Adam {params["beta1"]},{params["beta2"]}', 
            showlegend=(i == 0)  # Show legend only once
        ), row=1, col=i+1)
        
        # Add data to the v figure (denominators)
        fig_v.add_trace(go.Scatter(
            x=[item[0]/params['lr'] for item in denominators],  # Extract time values
            y=[item[1] for item in denominators],  # Normalize v values by learning rate
            mode='lines',
            line=dict(color=color),
            name=f'Nonlocal Adam beta1={params["beta1"]}, beta2={params["beta2"]}',  # Simplified name
            legendgroup=f'Nonlocal Adam {params["beta1"]},{params["beta2"]}', 
            showlegend=(i == 0)  # Show legend only once
        ), row=1, col=i+1)

# Update titles and labels for the theta figure
fig_theta.update_layout(
    title_text='Theta values convergence trajectories for Nonlocal Continuous Adam',
    showlegend=True,
    width=1500,
    height=600
)
fig_theta.update_xaxes(title_text="t/alpha")
fig_theta.update_yaxes(tickformat=".1f", title_text="Theta values")

# Update titles and labels for the m figure
fig_m.update_layout(
    title_text='First moment (m) trajectories for Nonlocal Continuous Adam',
    showlegend=True,
    width=1500,
    height=600
)
fig_m.update_xaxes(title_text="t/alpha")
fig_m.update_yaxes(title_text="First moment values")

# Update titles and labels for the v figure
fig_v.update_layout(
    title_text='Second moment (v) trajectories for Nonlocal Continuous Adam',
    showlegend=True,
    width=1500,
    height=600
)
fig_v.update_xaxes(title_text="t/alpha")
fig_v.update_yaxes(title_text="Second moment values")

# Show the figures
fig_theta.show()
fig_m.show()
fig_v.show()


Adam Configuration: {'beta1': 0.9, 'beta2': 0.99, 'lr': 0.1}
Epoch: 50, Error: 0.004799330898878296.
Epoch: 100, Error: 0.0021703457886625976.
Last epoch: 101, Error: 0.0021703457886625976.

Adam Configuration: {'beta1': 0.9, 'beta2': 0.999, 'lr': 0.1}
Epoch: 50, Error: 0.006239302049674045.
Epoch: 100, Error: 0.0011988458789149448.
Last epoch: 101, Error: 0.0011988458789149448.

Adam Configuration: {'beta1': 0.0, 'beta2': 0.99, 'lr': 0.1}
Epoch: 50, Error: 0.016781864495888144.
Epoch: 100, Error: 0.00020620802432569363.
Last epoch: 101, Error: 0.00020620802432569363.

Adam Configuration: {'beta1': 0.0, 'beta2': 0.999, 'lr': 0.1}
Epoch: 50, Error: 0.01684481567661056.
Epoch: 100, Error: 0.0003506600043072794.
Last epoch: 101, Error: 0.0003506600043072794.

Nonlocal Continuous Adam Configuration: {'beta1': 0.9, 'beta2': 0.99, 'lr': 0.1}
Iteration 0 advanced. Current error: 56.71356670224943.
Iteration 1 advanced. Current error: 12.43481649320467.
Iteration 2 advanced. Current error: 13