## RLDynamicHedger - Reads the optimal hyper-parameter for trained/tuned RL agents

#### Imports

In [1]:
import os, sys
import numpy as np
import pandas as pd

SEED = 100
NEW_LINE = "\n"

np.random.seed(SEED)

#### Set current working directory..

In [2]:
ROOT_PATH = "../"
os.chdir(ROOT_PATH)
print(f"Current path is: {os.getcwd()}...{NEW_LINE}")

Current path is: C:\Development\Training\MLI Certificate of Finance\Final-Project\Project\RLDynamicHedgerV2...



#### Libaries for RLDynamicHedger

In [3]:
from src.main.utility.enum_types import HedgingType, RLAgorithmType

#### RL agent settings

In [4]:
REWARD_CURVE_DATA_FOLDER_PATH = "model/trained-tuned-models/{0}/tuned-base-model-hyper-parameters"
REWAWRD_CURVE_DATA_FILE_NAME_TEMPLATE = "{0}_tuning_results.csv"
REWARD_CURVE_DATA_PATHS = {
    RLAgorithmType.td3: f"{REWARD_CURVE_DATA_FOLDER_PATH}/{REWAWRD_CURVE_DATA_FILE_NAME_TEMPLATE}".format(RLAgorithmType.td3.name),
    RLAgorithmType.ddpg: f"{REWARD_CURVE_DATA_FOLDER_PATH}/{REWAWRD_CURVE_DATA_FILE_NAME_TEMPLATE}".format(RLAgorithmType.ddpg.name),
    RLAgorithmType.sac: f"{REWARD_CURVE_DATA_FOLDER_PATH}/{REWAWRD_CURVE_DATA_FILE_NAME_TEMPLATE}".format(RLAgorithmType.sac.name),
    RLAgorithmType.ppo: f"{REWARD_CURVE_DATA_FOLDER_PATH}/{REWAWRD_CURVE_DATA_FILE_NAME_TEMPLATE}".format(RLAgorithmType.ppo.name),
}
REWARD_CURVE_REMOVE_COLUMNS = ["datetime_start", "datetime_complete", "number", "duration"]


#### Read the tuned hyper-parameter data per RL agent 

In [11]:
def readHyperparameterData():
    """
    Reads the tuned hyper-parameter of RL agents
    """
    for k, v in REWARD_CURVE_DATA_PATHS.items():
        print(f"Hyper-parameter data for {k.name} RL agent is")
        
        if os.path.exists(v):
            data_df = pd.read_csv(v, index_col=False)
            data_df.drop(REWARD_CURVE_REMOVE_COLUMNS, axis=1, inplace=True)
            filter_value_1 = (data_df.state == "COMPLETE") 
            filter_value_2 = (data_df.value == data_df.value.max())
            result_df = data_df[filter_value_1]
            # result_df = result_df[filter_value_2]
            with pd.option_context("display.max_rows", None, "display.max_columns", None):
                display(result_df)

readHyperparameterData()

Hyper-parameter data for td3 RL agent is


Unnamed: 0.1,Unnamed: 0,value,params_activation_fn,params_batch_size,params_buffer_size,params_gamma,params_gradient_steps,params_learning_rate,params_net_arch,params_noise_std,params_noise_type,params_tau,params_train_freq,state
0,0,-0.904502,relu,64,10000,0.95,1,0.274329,small,0.658213,,0.05,64,COMPLETE
1,1,-0.571939,elu,512,1000000,0.995,128,0.000157,small,0.739094,normal,0.001,64,COMPLETE
2,2,-1.515264,leaky_relu,2048,10000,0.98,256,1.2e-05,small,0.309841,normal,0.01,128,COMPLETE
3,3,-1.305868,relu,32,1000000,0.99,32,0.012344,medium,0.954587,normal,0.05,32,COMPLETE
4,4,-0.702875,elu,16,10000,0.99,1,0.000244,medium,0.457478,,0.005,128,COMPLETE
6,6,-1.004835,tanh,100,100000,0.995,4,1.3e-05,small,0.021083,normal,0.02,256,COMPLETE


Hyper-parameter data for ddpg RL agent is


Unnamed: 0.1,Unnamed: 0,value,params_activation_fn,params_batch_size,params_buffer_size,params_gamma,params_learning_rate,params_net_arch,params_noise_std,params_noise_type,params_tau,params_train_freq,state
0,0,-2.016063,leaky_relu,32,1000000,0.9999,0.000228,small,0.536949,normal,0.05,32,COMPLETE
1,1,-1.316609,elu,64,100000,0.99,0.000613,medium,0.216162,ornstein-uhlenbeck,0.08,512,COMPLETE
2,2,-7.730636,relu,1024,100000,0.98,0.000196,medium,0.123686,normal,0.02,128,COMPLETE
3,3,-8.815033,tanh,64,10000,0.9999,0.697232,big,0.621102,normal,0.08,4,COMPLETE
4,4,-15.093211,tanh,16,10000,0.98,0.001236,big,0.518735,,0.02,64,COMPLETE
5,5,-2.311831,elu,64,100000,0.99,0.041111,medium,0.086672,ornstein-uhlenbeck,0.005,512,COMPLETE
7,7,-14.477071,elu,256,1000000,0.9,0.017146,small,0.299355,ornstein-uhlenbeck,0.001,512,COMPLETE


Hyper-parameter data for sac RL agent is


Unnamed: 0.1,Unnamed: 0,value,params_activation_fn,params_batch_size,params_buffer_size,params_gamma,params_learning_rate,params_learning_starts,params_log_std_init,params_net_arch,params_tau,params_train_freq,state
0,0,-0.869821,relu,2048,1000000,0.95,0.000874,10000,-2.473397,big,0.01,32,COMPLETE
1,1,-0.075255,tanh,32,1000000,0.9999,0.316197,10000,-0.114221,small,0.05,32,COMPLETE
2,2,-1.749782,relu,128,1000000,0.999,0.002275,20000,-2.126581,big,0.05,128,COMPLETE
3,3,-0.417769,tanh,32,100000,0.98,2.6e-05,20000,-2.095959,medium,0.01,128,COMPLETE
4,4,-0.467069,relu,64,1000000,0.9,0.005257,10000,-2.806197,big,0.001,128,COMPLETE
6,6,0.040406,elu,1024,100000,0.9999,0.713286,10000,0.153719,small,0.08,256,COMPLETE
8,8,-0.807993,elu,1024,100000,0.995,0.054782,10000,-3.736572,medium,0.02,256,COMPLETE


Hyper-parameter data for ppo RL agent is


Unnamed: 0.1,Unnamed: 0,value,params_activation_fn,params_batch_size,params_clip_range,params_ent_coef,params_gae_lambda,params_gamma,params_learning_rate,params_log_std_init,params_lr_schedule,params_max_grad_norm,params_n_epochs,params_n_steps,params_net_arch,params_ortho_init,params_sde_sample_freq,params_vf_coef,state
0,0,-2.557539,tanh,32,0.7,0.0006038639,0.99,0.995,3e-05,-3.676704,constant,0.7,1,512,small,True,-1,0.612143,COMPLETE
1,1,-3.726087,relu,256,0.5,0.06311602,0.8,0.999,0.227543,-2.158419,linear,0.8,1,64,tiny,True,32,0.402036,COMPLETE
2,2,-1.955981,relu,64,0.5,1.211868e-08,1.0,0.99,0.000878,-1.707272,constant,5.0,10,1024,medium,True,32,0.619256,COMPLETE
3,3,-1.877143,elu,32,0.8,0.01283054,0.95,0.9999,0.053756,-1.129787,constant,0.9,1,256,small,True,8,0.147836,COMPLETE
4,4,-0.860578,elu,512,0.9,1.281299e-06,0.99,0.99,0.00414,0.673874,linear,1.0,1,512,tiny,False,256,0.259742,COMPLETE
7,7,-1.841926,elu,16,0.9,2.103796e-07,0.92,0.9,0.016258,-0.467225,linear,0.5,20,8,medium,False,16,0.291919,COMPLETE
