# Incremental Dual Heuristic Programming (IDHP) Hyperparameters Analysis

The focus will primarily be on three fundamental hyperparameters:

- Learning Rates: This refers to the speed at which our model learns. A lower learning rate may require more iterations to converge, but a high learning rate might cause the algorithm to overshoot the optimal solution or cause unstable learning.
- Discount Factor (Gamma): The discount factor determines how much future rewards are taken into consideration for decision-making in the present step. A low discount factor will make the agent "short-sighted," favoring immediate rewards, while a high discount factor places more value on long-term rewards.
- Tracking Tasks: We will define different tasks for our RL agent to follow, which will alter the training environment and therefore the learned policy. This will allow us to examine how our algorithm performs under different conditions and objectives.


## Setup
Import the necessary packages and set default styles

In [1]:
from visualization.utils import make_defaults
from visualization.utils import defaults as d
from helpers.paths import Path
import pandas as pd
import plotly.express as px

make_defaults()

## Load and Prepare Data

In [21]:
# Load the data
df_sac_lr = pd.read_csv(Path().data / 'idhp-sac-hyperparams_z3xgpv6z.csv')
df_dsac_lr = pd.read_csv(Path().data / 'idhp-sac-hyperparams_050zos9u.csv')
df_sac_discount = pd.read_csv(Path().data / 'idhp-sac-hyperparams_o1ff7591.csv')
df_dsac_discount = pd.read_csv(Path().data / 'idhp-sac-hyperparams_ru9v8t1q.csv')

# Transform numeric columns to numeric type

In [22]:
numeric_cols = ['lr_a_high', 'lr_c_high', 'nmae_improvement', 'sac_nmae', 'idhp_nmae']
for _df in [df_sac_lr, df_dsac_lr, df_sac_discount, df_dsac_discount]:
    _df[_df['nmae_improvement'].isna()] = -1e-9
    _df[numeric_cols] = _df[numeric_cols].apply(pd.to_numeric)
    _df.loc[_df['nmae_improvement']  < -1e6, "status"] = -1
    _df.loc[(_df['nmae_improvement'] >= -1e6) & (_df['nmae_improvement'] < -30), "status"] = 0
    _df.loc[(_df['nmae_improvement'] >= -30), "status"] = 1
    _df['status'] = _df['status'].astype(int)

# Plot Experiments changing learning rate

In [23]:
actor_critic_lr_layout = dict(
    autosize=False,  # Disable auto size
    width=d.width,  # width in pixels
    height=300,  # height in pixels
    font=d.font,
    coloraxis_colorbar=dict(
        title="",
        tickvals=[-1, 0],
        ticktext=["-1: Failed", "0: Unacceptable"],
        lenmode="pixels", len=100,
        tickfont_size=15,
    ),
    margin=dict(  # Reducing the white space around the plot
        l=40,  # left margin
        r=2,  # right margin
        b=10,  # bottom margin
        t=45,  # top margin
    )
)

## Changing actor and critic learning rate at the same time

In [17]:
IS_SAC = True

df = df_sac_lr if IS_SAC else df_dsac_lr
df = df[(df['status']==1)]
fig = px.parallel_coordinates(df[['lr_a_high', 'lr_c_high', 'nmae_improvement']], color='nmae_improvement',

                              labels={
                                  'lr_a_high': "η Actor [-]",
                                  'lr_c_high': "η Critic [-]",
                                  'nmae_improvement': 'nMAE Improvement [%]'},
                              color_continuous_scale=px.colors.sequential.Bluered[::-1],
                              color_continuous_midpoint=0.5,
                              range_color=[-30, 30],
                              template="seaborn")

layout_nmae_improvement = actor_critic_lr_layout.copy()
layout_nmae_improvement['coloraxis_colorbar'] = None

fig.update_layout(**layout_nmae_improvement)

fig.show()

if False:
    fig.write_image(Path().paper_figures / f"hyperparams-idhp-{'sac' if IS_SAC else 'dsac'}-lr-success.pdf")

In [15]:
IS_SAC = True

df = df_sac_lr if IS_SAC else df_dsac_lr
df = df[(df['status']!=1)]
fig = px.parallel_coordinates(df[['lr_a_high', 'lr_c_high', 'status']], color='status',

                              labels={
                                  'lr_a_high': "η Actor [-]",
                                  'lr_c_high': "η Critic [-]",
                                  'status': 'Status [-]'},
                              color_continuous_scale=px.colors.sequential.Bluered[::-1],
                              color_continuous_midpoint=0.5,
                              range_color=[-1, 0],
                              template="seaborn")

fig.update_layout(**actor_critic_lr_layout)

fig.show()

if False:
    fig.write_image(Path().paper_figures / f"hyperparams-idhp-{'sac' if IS_SAC else 'dsac'}-lr-failure.pdf")

# Changing discount factor

In [32]:
IS_SAC = False

df = df_sac_discount if IS_SAC else df_dsac_discount
df = df[(df['status']==1)]
fig = px.parallel_coordinates(df[['discount_factor', 'discount_factor_model', 'nmae_improvement']], color='nmae_improvement',

                              labels={
                                  'discount_factor': "γ [-]",
                                  'discount_factor_model': "γRLS [-]",
                                  'nmae_improvement': 'nMAE Improvement [%]'},
                              color_continuous_scale=px.colors.sequential.Bluered[::-1],
                              color_continuous_midpoint=0.5,
                              range_color=[-30, 30],
                              template="seaborn")

layout_nmae_improvement = actor_critic_lr_layout.copy()
layout_nmae_improvement['coloraxis_colorbar'] = None

fig.update_layout(**layout_nmae_improvement)

fig.show()

if True:
    fig.write_image(Path().paper_figures / f"hyperparams-idhp-{'sac' if IS_SAC else 'dsac'}-discount-success.pdf")

In [34]:
IS_SAC = False

df = df_sac_discount if IS_SAC else df_dsac_discount
df = df[(df['status']!=1)]
fig = px.parallel_coordinates(df[['discount_factor', 'discount_factor_model', 'status']], color='status',

                              labels={
                                  'discount_factor': "γ [-]",
                                  'discount_factor_model': "γRLS [-]",
                                  'status': 'Status [-]'},
                              color_continuous_scale=px.colors.sequential.Bluered[::-1],
                              color_continuous_midpoint=0.5,
                              range_color=[-1, 0],
                              template="seaborn")

fig.update_layout(**actor_critic_lr_layout)

fig.show()

if True:
    fig.write_image(Path().paper_figures / f"hyperparams-idhp-{'sac' if IS_SAC else 'dsac'}-discount-failure.pdf")