# RMSProp Simulations

## Parameters

In [1]:
from Solver.Solver import RMSPropMomentum
from Solver.NonlocalSolver import NonlocalSolverMomentumRMSProp
from sklearn.model_selection import ParameterGrid
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np


param_grid = {'lr': [0.1, 0.01], 'beta': [0.0, 0.9, 0.99]}
n_learning_rates = len(param_grid['lr'])
param_list = list(ParameterGrid(param_grid))

dL = lambda y: 2 * (y - 4)
f = lambda x, y: 0.0
t = [1e-12, 6]

## RMSProp - Discrete

In [2]:
# Create subplots for visualizing theta values and squared gradients (v) for different learning rates
fig_theta = make_subplots(rows=1, cols=n_learning_rates, 
                          subplot_titles=[f'Learning Rate = {lr}' for lr in param_grid['lr']])
fig_v = make_subplots(rows=1, cols=n_learning_rates, 
                      subplot_titles=[f'Learning Rate = {lr}' for lr in param_grid['lr']])

# Iterate over each learning rate in the parameter grid
for i, lr in enumerate(param_grid['lr']):

    # Filter the list of parameters for the current learning rate
    filtered_params = [p for p in param_list if p['lr'] == lr]

    # Set the number of epochs based on the learning rate
    if lr == 0.1:
        epochs = 60  # 60 epochs for learning rate 0.1
    elif lr == 0.01:
        epochs = 600  # 600 epochs for learning rate 0.01

    # Iterate over the filtered parameter configurations
    for params in filtered_params:
        theta_initial = 1.0  # Initial value for theta
        
        print(f'\nRMSProp Configuration: {params}')
        
        # Initialize and solve the RMSProp optimization problem with momentum
        solver = RMSPropMomentum(dL=dL, lr=lr, beta=params['beta'], epochs=epochs)
        solver.solve(theta_initial=theta_initial)

        label = f"beta={params['beta']}"  # Label for the legend
        
        # Add the theta results to the subplot for theta convergence
        fig_theta.add_trace(go.Scatter(
            x=list(range(epochs)),  # X-axis: epoch numbers
            y=solver.theta_result,  # Y-axis: theta values over epochs
            mode='lines',           # Line plot
            name=label,             # Name for the legend
            legendgroup=f'LR={lr}', # Group traces by learning rate
        ), row=1, col=i+1)  # Add to the appropriate subplot
            
        # Add the squared gradient results (v) to the subplot for v convergence
        fig_v.add_trace(go.Scatter(
            x=list(range(epochs)),  # X-axis: epoch numbers
            y=solver.v_result,      # Y-axis: squared gradients (v) over epochs
            mode='markers',         # Scatter plot with markers
            marker=dict(size=3),    # Marker size
            name=label,             # Name for the legend
            legendgroup=f'LR={lr}', # Group traces by learning rate
        ), row=1, col=i+1)  # Add to the appropriate subplot

# Update layout titles and legend visibility for the theta plot
fig_theta.update_layout(title_text='Theta values convergence trajectories for the RMSProp Optimizer', showlegend=True)

# Update layout titles and legend visibility for the squared gradients plot
fig_v.update_layout(title_text='Squared gradients convergence trajectories for the RMSProp Optimizer', showlegend=True)

# Update the axes labels and formatting for the theta plot
fig_theta.update_xaxes(title_text="k")
fig_theta.update_yaxes(tickformat=".1f", title_text="Theta_k")
fig_theta.update_layout(
    width=1500,  # Width in pixels
    height=600   # Height in pixels
)

# Update the axes labels and formatting for the squared gradients plot
fig_v.update_xaxes(title_text="k")
fig_v.update_yaxes(title_text="v_k")
fig_v.update_layout(
    width=1500,  # Width in pixels
    height=600   # Height in pixels
)

# Display the figures
fig_theta.show()
fig_v.show()



RMSProp Configuration: {'beta': 0.0, 'lr': 0.1}
Epoch: 50, Error: 0.09999999374841728.
Last epoch: 61, Error: 0.09999999374840973.

RMSProp Configuration: {'beta': 0.9, 'lr': 0.1}
Epoch: 50, Error: 0.001205871813758197.
Last epoch: 61, Error: 9.220941850074382e-06.

RMSProp Configuration: {'beta': 0.99, 'lr': 0.1}
Epoch: 50, Error: 1.6699867577685268e-07.
Last epoch: 61, Error: 4.342334047890972e-09.

RMSProp Configuration: {'beta': 0.0, 'lr': 0.01}
Epoch: 50, Error: 0.009999999980079721.
Epoch: 100, Error: 0.009999999975124352.
Epoch: 150, Error: 0.009999999966887607.
Epoch: 200, Error: 0.009999999950494942.
Epoch: 250, Error: 0.009999999901960877.
Epoch: 300, Error: 0.009999995000015804.
Epoch: 350, Error: 0.0099999942036475.
Epoch: 400, Error: 0.009999994203133244.
Epoch: 450, Error: 0.009999994202619877.
Epoch: 500, Error: 0.00999999420210651.
Epoch: 550, Error: 0.009999994201593143.
Epoch: 600, Error: 0.009999994201080664.
Last epoch: 601, Error: 0.009999994201080664.

RMSProp Co

## Nonlocal RMSProp

In [5]:
# Create subplots for visualizing theta values and v values for different learning rates
fig_theta = make_subplots(rows=1, cols=n_learning_rates, 
                          subplot_titles=[f'Learning Rate = {lr}' for lr in param_grid['lr']])
fig_v = make_subplots(rows=1, cols=n_learning_rates, 
                      subplot_titles=[f'Learning Rate = {lr}' for lr in param_grid['lr']])

# Update layout titles for the plots
fig_theta.update_layout(title_text='Theta values convergence trajectories for the first-order nonlocal continuous RMSProp')
fig_v.update_layout(title_text='v over time for the first-order nonlocal continuous RMSProp')

# Iterate over each learning rate in the parameter grid
for i, lr in enumerate(param_grid['lr']):

    # Filter the list of parameters for the current learning rate
    filtered_params = [p for p in param_list if p['lr'] == lr]

    # Iterate over the filtered parameter configurations
    for params in filtered_params:
        print(f'\nNonlocal Continuous RMSProp Configuration: {params}')

        # Initialize and solve the nonlocal continuous RMSProp solver
        solver = NonlocalSolverMomentumRMSProp(f=f, dL=dL, t_span=t, y0=np.array([1.0]), alpha=params['lr'], beta=params['beta'])
        t_values, y_values = solver.solve()
        
        label = f"beta={params['beta']}"  # Label for the legend

        # Add the theta results to the subplot for theta convergence
        fig_theta.add_trace(go.Scatter(
            x=t_values / params['lr'],  # Normalize time by the learning rate
            y=y_values,                 # Theta values over time
            mode='lines',               # Line plot
            name=label,                 # Name for the legend
            legendgroup=f'LR={lr}',     # Group traces by learning rate
        ), row=1, col=i+1)  # Add to the appropriate subplot
        
        # Retrieve the denominator values from the solver
        denominators = solver.v
        
        # Add the denominator values to the subplot for v over time
        fig_v.add_trace(go.Scatter(
            x=[item[0] / params['lr'] for item in denominators],  # Normalize time by the learning rate
            y=[item[1] for item in denominators],                 # Denominator values (v) over time
            mode='markers',                                       # Scatter plot with markers
            marker=dict(size=3),                                  # Marker size
            name=label,                                           # Name for the legend
            legendgroup=f'LR={lr}',                               # Group traces by learning rate
        ), row=1, col=i+1)  # Add to the appropriate subplot

# Update the axes labels and layout for the theta plot
fig_theta.update_xaxes(title_text="t/alpha")
fig_theta.update_yaxes(tickformat=".1f", title_text="Theta(t)")
fig_theta.update_layout(
    width=1500,  # Width in pixels
    height=600   # Height in pixels
)

# Update the axes labels and layout for the v plot
fig_v.update_xaxes(title_text="t/alpha")
fig_v.update_yaxes(title_text="v(t)")
fig_v.update_layout(
    width=1500,  # Width in pixels
    height=600   # Height in pixels
)

# Display the figures
fig_theta.show()
fig_v.show()


Nonlocal Continuous RMSProp Configuration: {'beta': 0.0, 'lr': 0.1}
Iteration 0 advanced. Current error: 26.74703493432793.
Iteration 1 advanced. Current error: 12.474723345513507.
Iteration 2 advanced. Current error: 6.894801146818074.
Iteration 3 advanced. Current error: 6.446503367493363.
Iteration 4 advanced. Current error: 5.73921119443525.
Iteration 5 advanced. Current error: 4.144749209386298.
Iteration 6 advanced. Current error: 5.56415052746627.
Iteration 7 advanced. Current error: 2.865402202424801.
Iteration 8 advanced. Current error: 4.543959604146071.
Iteration 9 advanced. Current error: 2.9024624121090974.
Iteration 10 advanced. Current error: 3.5620110696007328.
Iteration 11 advanced. Current error: 2.561182111416388.
Iteration 12 advanced. Current error: 2.7876713697657522.
Iteration 13 advanced. Current error: 2.113604068047686.
Iteration 14 advanced. Current error: 2.880837981208569.
Iteration 15 advanced. Current error: 2.1548608917373486.
Iteration 16 advanced. Cur

## Both Models Together

In [5]:
# Configuration of colors for each beta value
config_colors = {
    (0.0): 'blue',  # Configuration for beta=0.0
    (0.9): 'green',  # Configuration for beta=0.9
    (0.99): 'red'    # Configuration for beta=0.99
}

# Create a figure for theta trajectories
fig_theta = make_subplots(
    rows=1,
    cols=2,  # Two columns, one for each learning rate
    subplot_titles=[f'Learning Rate = {lr}' for lr in param_grid['lr']]
)

# Create a figure for squared gradients (v) values
fig_v = make_subplots(
    rows=1,
    cols=2,  # Two columns, one for each learning rate
    subplot_titles=[f'Learning Rate = {lr}' for lr in param_grid['lr']]
)

# Iterate over each learning rate in the parameter grid
for i, lr in enumerate(param_grid['lr']):

    # Filter the list of parameters for the current learning rate
    filtered_params = [p for p in param_list if p['lr'] == lr]

    # Set the number of epochs based on the learning rate
    if lr == 0.1:
        epochs = 60  # 60 epochs for learning rate 0.1
        t = [1e-12, 60 * lr]
    elif lr == 0.01:
        epochs = 600  # 600 epochs for learning rate 0.01
        t = [1e-12, 600 * lr]

    # RMSProp simulations
    for params in filtered_params:
        theta_initial = 1.0  # Initial value for theta
        
        color = config_colors[params['beta']]
        
        print(f'\nRMSProp Configuration: {params}')
        
        # Initialize and solve the RMSProp optimization problem with momentum
        solver = RMSPropMomentum(dL=dL, lr=lr, beta=params['beta'], epochs=epochs)
        solver.solve(theta_initial=theta_initial)

        # Add the theta results to the subplot for theta convergence
        fig_theta.add_trace(go.Scatter(
            x=list(range(epochs)),  # X-axis: epoch numbers
            y=solver.theta_result,  # Y-axis: theta values over epochs
            mode='markers',           
            marker=dict(symbol='x', size=4, line=dict(width=0.05), color=color),
            name=f'RMSProp beta={params["beta"]}',  
            legendgroup=f'RMSProp beta={params["beta"]}', 
            showlegend=(i == 0)  # Show legend only once
        ), row=1, col=i+1)  # Add to the appropriate subplot
            
        # Add the squared gradient results (v) to the subplot for v convergence
        fig_v.add_trace(go.Scatter(
            x=list(range(epochs)),  # X-axis: epoch numbers
            y=solver.v_result,      # Y-axis: squared gradients (v) over epochs
            mode='markers',         
            marker=dict(symbol='x', size=4, line=dict(width=0.05), color=color),
            name=f'RMSProp beta={params["beta"]}',  
            legendgroup=f'RMSProp beta={params["beta"]}',
            showlegend=(i == 0)  # Show legend only once
        ), row=1, col=i+1)  # Add to the appropriate subplot

    # Nonlocal Continuous RMSProp simulations
    for params in filtered_params:
        color = config_colors[params['beta']]

        print(f'\nNonlocal Continuous RMSProp Configuration: {params}')

        # Initialize and solve the nonlocal continuous RMSProp solver
        solver_nonlocal = NonlocalSolverMomentumRMSProp(f=f, dL=dL, t_span=t, y0=np.array([1.0]), alpha=params['lr'], beta=params['beta'])
        t_values, y_values = solver_nonlocal.solve()
        
        # Add the theta results to the subplot for theta convergence
        fig_theta.add_trace(go.Scatter(
            x=t_values / params['lr'],  # Normalize time by the learning rate
            y=y_values,                 # Theta values over time
            mode='lines',               
            line=dict(color=color),
            name=f'Nonlocal RMSProp beta={params["beta"]}',  # Include beta in the legend name
            legendgroup=f'Nonlocal RMSProp beta={params["beta"]}', 
            showlegend=(i == 0)  # Show legend only once
        ), row=1, col=i+1)  # Add to the appropriate subplot
        
        # Retrieve the denominator values from the solver
        denominators = solver_nonlocal.v
        
        # Add the denominator values to the subplot for v over time
        fig_v.add_trace(go.Scatter(
            x=[item[0] / params['lr'] for item in denominators],  # Normalize time by the learning rate
            y=[item[1] for item in denominators],                 
            mode='lines',                                       
            line=dict(color=color),                                  
            name=f'Nonlocal RMSProp beta={params["beta"]}',  # Include beta in the legend name   
            legendgroup=f'Nonlocal RMSProp beta={params["beta"]}',                       
            showlegend=(i == 0)  # Show legend only once
        ), row=1, col=i+1)  # Add to the appropriate subplot

# Update layout titles and legend visibility for the theta plot
fig_theta.update_layout(title_text='Theta values convergence trajectories for Nonlocal Continuous RMSProp', showlegend=True)

# Update layout titles and legend visibility for the squared gradients plot
fig_v.update_layout(title_text='Squared gradients convergence trajectories for Nonlocal Continuous RMSProp', showlegend=True)

# Update the axes labels and formatting for the theta plot
fig_theta.update_xaxes(title_text="t/alpha")
fig_theta.update_yaxes(tickformat=".1f", title_text="Theta values")
fig_theta.update_layout(
    width=1500,  # Width in pixels
    height=600   # Height in pixels
)

# Update the axes labels and formatting for the squared gradients plot
fig_v.update_xaxes(title_text="t/alpha")
fig_v.update_yaxes(title_text="v values")
fig_v.update_layout(
    width=1500,  # Width in pixels
    height=600   # Height in pixels
)

# Display the figures
fig_theta.show()
fig_v.show()


RMSProp Configuration: {'beta': 0.0, 'lr': 0.1}
Epoch: 50, Error: 0.09999999374841728.
Last epoch: 61, Error: 0.09999999374840973.

RMSProp Configuration: {'beta': 0.9, 'lr': 0.1}
Epoch: 50, Error: 0.001205871813758197.
Last epoch: 61, Error: 9.220941850074382e-06.

RMSProp Configuration: {'beta': 0.99, 'lr': 0.1}
Epoch: 50, Error: 1.6699867577685268e-07.
Last epoch: 61, Error: 4.342334047890972e-09.

Nonlocal Continuous RMSProp Configuration: {'beta': 0.0, 'lr': 0.1}
Iteration 0 advanced. Current error: 26.74703493432793.
Iteration 1 advanced. Current error: 12.474723345513507.
Iteration 2 advanced. Current error: 6.894801146818074.
Iteration 3 advanced. Current error: 6.446503367493363.
Iteration 4 advanced. Current error: 5.73921119443525.
Iteration 5 advanced. Current error: 4.144749209386298.
Iteration 6 advanced. Current error: 5.56415052746627.
Iteration 7 advanced. Current error: 2.865402202424801.
Iteration 8 advanced. Current error: 4.543959604146071.
Iteration 9 advanced. C