# Incremental Dual Heuristic Programming (IDHP) Hyperparameters Analysis

The focus will primarily be on three fundamental hyperparameters:

- Learning Rates: This refers to the speed at which our model learns. A lower learning rate may require more iterations to converge, but a high learning rate might cause the algorithm to overshoot the optimal solution or cause unstable learning.
- Discount Factor (Gamma): The discount factor determines how much future rewards are taken into consideration for decision-making in the present step. A low discount factor will make the agent "short-sighted," favoring immediate rewards, while a high discount factor places more value on long-term rewards.
- Tracking Tasks: We will define different tasks for our RL agent to follow, which will alter the training environment and therefore the learned policy. This will allow us to examine how our algorithm performs under different conditions and objectives.


## Setup
Import the necessary packages and set default styles

In [1]:
from visualization.utils import make_defaults
from visualization.utils import defaults as d
from helpers.paths import Path
import pandas as pd
import plotly.express as px

make_defaults()

## Load and Prepare Data

In [43]:
# Load the data
df_sac_lr = pd.read_csv(Path().data / 'idhp-sac-hyperparams_z3xgpv6z.csv')
df_dsac_lr = pd.read_csv(Path().data / 'idhp-sac-hyperparams_050zos9u.csv')
df_sac_actor = pd.read_csv(Path().data / 'idhp-sac-hyperparams_qnrug81d.csv')
df_dsac_actor = pd.read_csv(Path().data / 'idhp-sac-hyperparams_98h75wwb.csv')

In [3]:
# Inspect the data
df_sac_lr.head()

Unnamed: 0.1,Unnamed: 0,name,lr_a_high,lr_c_high,sac_model,task_train,discount_factor,discount_factor_model,_step,_wandb,_runtime,sac_nmae,idhp_nmae,_timestamp,nmae_improvement
0,0,elated-sweep-200,1e-05,0.8,SAC-citation/divine-grass-171,exp1_pseudo_random_sin,0.6,0.6,0,{'runtime': 1},2.034993,16.011297,10000000000.0,1685777000.0,-10000000000.0
1,1,dutiful-sweep-199,0.01,1e-05,SAC-citation/divine-grass-171,exp1_pseudo_random_sin,0.6,0.6,0,{'runtime': 8},9.052726,16.011297,20.17904,1685777000.0,-4.167744
2,2,upbeat-sweep-198,0.7,0.4,SAC-citation/divine-grass-171,exp1_pseudo_random_sin,0.6,0.6,0,{'runtime': 1},1.9613,16.011297,10000000000.0,1685777000.0,-10000000000.0
3,3,winter-sweep-197,0.0005,0.0001,SAC-citation/firm-feather-173,exp1_pseudo_random_sin,0.6,0.6,0,{'runtime': 8},8.994734,20.585378,32.90584,1685777000.0,-12.32046
4,4,worldly-sweep-196,0.0001,0.0001,SAC-citation/divine-grass-171,exp1_pseudo_random_sin,0.6,0.6,0,{'runtime': 8},9.036116,16.011297,105.6962,1685777000.0,-89.68489


In [4]:
df_dsac_lr.head()

Unnamed: 0.1,Unnamed: 0,name,agent,lr_a_high,lr_c_high,sac_model,task_train,discount_factor,discount_factor_model,idhp_nmae,_timestamp,nmae_improvement,_step,_wandb,_runtime,sac_nmae
0,0,laced-sweep-200,IDHPDSAC,0.8,0.1,DSAC-citation/desert-fog-33,exp1_pseudo_random_sin,0.6,0.6,8.816165,1685809000.0,16.87222,0,{'runtime': 5},6.117897,25.688384
1,1,youthful-sweep-199,IDHPDSAC,0.001,0.2,DSAC-citation/smart-durian-34,exp1_pseudo_random_sin,0.6,0.6,39.30182,1685809000.0,-33.25755,0,{'runtime': 5},5.770966,6.044273
2,2,hardy-sweep-198,IDHPDSAC,0.7,0.001,DSAC-citation/smart-durian-34,exp1_pseudo_random_sin,0.6,0.6,10.43193,1685809000.0,-4.387662,0,{'runtime': 5},5.76166,6.044273
3,3,major-sweep-197,IDHPDSAC,1e-05,0.3,DSAC-citation/vague-hill-35,exp1_pseudo_random_sin,0.6,0.6,,1685809000.0,,0,{'runtime': 3},4.194715,25.824806
4,4,lemon-sweep-196,IDHPDSAC,0.4,0.8,DSAC-citation/desert-fog-33,exp1_pseudo_random_sin,0.6,0.6,10000000000.0,1685809000.0,-10000000000.0,0,{'runtime': 0},1.291676,25.688384


In [44]:
# Transform numeric columns to numeric type
numeric_cols = ['lr_a_high', 'lr_c_high', 'nmae_improvement', 'sac_nmae', 'idhp_nmae']
for df in [df_sac_lr, df_dsac_lr, df_sac_actor, df_dsac_actor]:
    df[numeric_cols] = df[numeric_cols].apply(pd.to_numeric)
    df["failed"] = df['nmae_improvement']  < -1e6
    df["failed"] = df["failed"].astype(int)

# Plot Experiments changing learning rate

In [53]:
actor_critic_lr_layout = dict(
    autosize=False,  # Disable auto size
    width=d.width,  # width in pixels
    height=400,  # height in pixels
    font=d.font,
    coloraxis_colorbar=dict(
        title="Failure",
        tickvals=[1, 0],
        ticktext=["Yes","No"],
        lenmode="pixels", len=100,
    ),
    margin=dict(  # Reducing the white space around the plot
        l=30,  # left margin
        r=2,  # right margin
        b=10,  # bottom margin
        t=45,  # top margin
    )
)

## Changing actor and critic learning rate at the same time

In [57]:
IS_SAC = False

df = df_sac_lr if IS_SAC else df_dsac_lr

fig = px.parallel_coordinates(df[['lr_a_high', 'lr_c_high', 'failed']], color='failed',
                              labels={
                                  'lr_a_high': "η Actor",
                                  'lr_c_high': "η Critic",
                                  'failed': 'Failure'},
                              color_continuous_scale=px.colors.sequential.Bluered,
                              color_continuous_midpoint=0.5,
                              template="seaborn")



fig.update_layout(**actor_critic_lr_layout)

fig.show()

if False:
    fig.write_image(Path().paper_figures / f"idhp-{'sac' if IS_SAC else 'dsac'}-hyperparams.pdf")

## Changing actor learning rate only

In [52]:
IS_SAC =False

df = df_sac_actor[df_sac_actor['nmae_improvement'] < 0] if IS_SAC else df_dsac_actor[df_dsac_actor['nmae_improvement'] < 0]

fig = px.parallel_coordinates(df[['lr_a_high', 'nmae_improvement']], color='nmae_improvement',
                              labels={
                                  'lr_a_high': "η Actor",
                                  'nmae_improvement': "nMAE Improvement"},
                              color_continuous_scale=px.colors.sequential.Bluered[::-1],
                              color_continuous_midpoint=0.5,
                              range_color=[-200, 0],
                              template="seaborn")

actor_lr_layout = actor_critic_lr_layout.copy()
actor_lr_layout['coloraxis_colorbar'] = None

fig.update_layout(**actor_lr_layout)

fig.show()

if False:
    fig.write_image(Path().paper_figures / f"idhp-{'sac' if IS_SAC else 'dsac'}-actor-hyperparams.pdf")
