In [4]:
import sys
import os

path = os.path.dirname(os.path.abspath("__file__"))
sys.path.insert(0, path + '/../')

from util.io import log_files
import importlib
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(rc={"figure.dpi":150, 'savefig.dpi':300})
sns.set_context('paper')

import numpy as np
import pandas as pd

from IPython.display import set_matplotlib_formats
set_matplotlib_formats('retina')

import json

In [137]:
hyperparameter_keys = ['batch_size', 'activation', 'epsilon_greedy','optimizer', 'learning_rate',
                       'priority_exponent', 'action_entropy_regularizer_scaling', 'buckets_based_priority',
                       'collect_steps_per_iteration', 'importance_sampling_exponent_growth_rate',
                       'importance_sampling_exponent',  'prioritized_experience_replay',
                       'epsilon_greedy_decay_rate', 'latent_size',
                       'relaxed_state_encoder_temperature',
                       'relaxed_state_prior_temperature',
                       'encoder_temperature', 'prior_temperature', 
                       'encoder_temperature_decay_rate',
                       'prior_temperature_decay_rate', 'entropy_regularizer_scale_factor',
                       'entropy_regularizer_decay_rate', 'kl_annealing_scale_factor', 'kl_annealing_growth_rate',
                       'start_annealing_step', 'number_of_discrete_actions', ]

def map_key(key):
    key = key.replace("_", " ")
    filters = {
        'latent size': r'$\left| \latentstates \right|$',
        'epsilon greedy': r'$\varepsilon$',
        'relaxed state encoder temperature': r'$\temperature_{1}^{\scriptscriptstyle \latentstates}$',
        'relaxed state prior temperature': r'$\temperature_{2}^{\scriptscriptstyle \latentstates}$',
        'encoder temperature': r'$\temperature_{1}^{\scriptscriptstyle \latentactions}$',
        'prior temperature': r'$\temperature_{2}^{\scriptscriptstyle \latentactions}$',
        'encoder temperature decay rate': r'$\tau_{\temperature_{1}}$',
        'prior temperature decay rate': r'$\tau_{\temperature_{2}}$',
        'entropy regularizer scale factor': '$' + r'\alpha$',
        'entropy regularizer decay rate': r'$\tau_{' + r'\alpha}$',
        'kl annealing scale factor': r'$\beta$',
        'kl annealing growth rate': r'$\tau_{\beta}$',
        'number of discrete actions': r'$\left| \latentactions \right|$',
        'epsilon greedy decay rate': r'$\tau_{' + r'\varepsilon}$',
        'importance sampling exponent': r'$\omega$',
        'importance sampling exponent growth rate': r'$\tau_{\omega}$',
        'buckets based priority': 'bucket based',
        'action entropy regularizer scaling': '$' + r'\alpha_2$'}
    return key.replace(key, filters.get(key, key))

def map_value(value):
    if value == 'leaky_relu':
        value = 'leaky relu'
    return value

df = None

## CartPole-v0

In [138]:
params = json.loads('{"logtostderr": false, "alsologtostderr": false, "log_dir": "", "v": 0, "verbosity": 0, "logger_levels": {}, "stderrthreshold": "fatal", "showprefixforinfo": true, "run_with_pdb": false, "pdb_post_mortem": false, "pdb": false, "run_with_profiling": false, "profile_file": null, "use_cprofile_for_profiling": true, "only_check_args": false, "op_conversion_fallback_to_while_loop": true, "runtime_oom_exit": true, "hbm_oom_exit": true, "test_random_seed": 301, "test_srcdir": "", "test_tmpdir": "/local/4071978-3.master01.hydra.brussel.vsc/absl_testing", "test_randomize_ordering_seed": "", "xml_output_file": "", "batch_size": 128, "mixture_components": 1, "action_mixture_components": 0, "full_covariance": false, "activation": "leaky_relu", "latent_size": 9, "max_state_decoder_variance": 0.0, "encoder_temperature": 0.99, "prior_temperature": 0.95, "relaxed_state_encoder_temperature": 0.67, "relaxed_state_prior_temperature": 0.5, "latent_policy": true, "encoder_temperature_decay_rate": 1e-06, "prior_temperature_decay_rate": 2e-06, "entropy_regularizer_scale_factor": 10.0, "entropy_regularizer_decay_rate": 1e-05, "entropy_regularizer_scale_factor_min_value": 0.0, "marginal_entropy_regularizer_ratio": 0.0, "kl_annealing_scale_factor": 0.0, "kl_annealing_growth_rate": 5e-05, "start_annealing_step": 10000, "max_steps": 1500000, "save_dir": "/theia/data/brussel/102/vsc10293/vae_mdp/", "logdir": "/theia/data/brussel/102/vsc10293/vae_mdp/log/2021-08-25", "display_progressbar": false, "action_discretizer": false, "one_output_per_action": false, "do_not_eval": false, "full_vae_optimization": true, "relaxed_state_encoding": true, "number_of_discrete_actions": 16, "load_vae": "", "encoder_layers": [256, 256], "decoder_layers": [256, 256], "transition_layers": [256, 256], "label_transition_layers": [256, 256], "reward_layers": [256, 256], "discrete_policy_layers": [256, 256], "policy_path": "/data/brussel/102/vsc10293/rl_policies/CartPole-v0/policy", "environment": "CartPole-v0", "env_suite": "suite_gym", "policy_environment": null, "parallel_env": 8, "annealing_period": 1, "aggressive_training": false, "initial_collect_steps": 10000, "seed": 22222222, "logs": true, "checkpoint": true, "epsilon_greedy": 0.0, "epsilon_greedy_decay_rate": 5e-06, "decompose_training": false, "prioritized_experience_replay": true, "priority_exponent": 0.33, "importance_sampling_exponent": 0.4, "importance_sampling_exponent_growth_rate": 7e-05, "buckets_based_priority": true, "collect_steps_per_iteration": 16, "hyperparameter_search": false, "hyperparameter_search_trials": 1, "prune_trials": false, "evaluation_window_size": 1, "wall_time": ".", "memory": -1.0, "time_stacked_states": 1, "state_encoder_pre_processing_network": false, "state_encoder_pre_processing_layers": [256, 256], "state_decoder_pre_processing_network": false, "state_decoder_pre_processing_layers": [256, 256], "optimizer": "Adam", "learning_rate": 0.001, "local_losses_evaluation": true, "local_losses_evaluation_steps": 34000, "local_losses_replay_buffer_size": 200000, "evaluation_interval": 10000, "label_transition_function": true, "action_entropy_regularizer_scaling": 1.0, "reward_upper_bound": null, "reward_lower_bound": null, "generate_videos": false, "?": false, "help": false, "helpshort": false, "helpfull": false, "helpxml": false}')

In [139]:
hyperparameters = dict((map_key(key), map_value(value)) for key, value in params.items() if key in hyperparameter_keys)

In [140]:
hyperparameters

{'batch size': 128,
 'activation': 'leaky relu',
 '$\\left| \\latentstates \\right|$': 9,
 '$\\temperature_{1}^{\\scriptscriptstyle \\latentactions}$': 0.99,
 '$\\temperature_{2}^{\\scriptscriptstyle \\latentactions}$': 0.95,
 '$\\temperature_{1}^{\\scriptscriptstyle \\latentstates}$': 0.67,
 '$\\temperature_{2}^{\\scriptscriptstyle \\latentstates}$': 0.5,
 '$\\tau_{\\temperature_{1}}$': 1e-06,
 '$\\tau_{\\temperature_{2}}$': 2e-06,
 '$\\alpha$': 10.0,
 '$\\tau_{\\alpha}$': 1e-05,
 '$\\beta$': 0.0,
 '$\\tau_{\\beta}$': 5e-05,
 'start annealing step': 10000,
 '$\\left| \\latentactions \\right|$': 16,
 '$\\varepsilon$': 0.0,
 '$\\tau_{\\varepsilon}$': 5e-06,
 'prioritized experience replay': True,
 'priority exponent': 0.33,
 '$\\omega$': 0.4,
 '$\\tau_{\\omega}$': 7e-05,
 'bucket based': True,
 'collect steps per iteration': 16,
 'optimizer': 'Adam',
 'learning rate': 0.001,
 '$\\alpha_2$': 1.0}

In [141]:
layers = None
neurons = None
for i in ['encoder', 'decoder', 'transition', 'label_transition', 'reward', 'discrete_policy']:
    if layers is None and neurons is None:
        layers = len(params[i + '_layers'])
        neurons = params[i + '_layers'][0]
    assert(layers == len(params[i + '_layers']))
    for layer in params[i + '_layers']:
        assert(layer == neurons)

hyperparameters['layers'] = layers
hyperparameters['neurons'] = neurons

hyperparameters

{'batch size': 128,
 'activation': 'leaky relu',
 '$\\left| \\latentstates \\right|$': 9,
 '$\\temperature_{1}^{\\scriptscriptstyle \\latentactions}$': 0.99,
 '$\\temperature_{2}^{\\scriptscriptstyle \\latentactions}$': 0.95,
 '$\\temperature_{1}^{\\scriptscriptstyle \\latentstates}$': 0.67,
 '$\\temperature_{2}^{\\scriptscriptstyle \\latentstates}$': 0.5,
 '$\\tau_{\\temperature_{1}}$': 1e-06,
 '$\\tau_{\\temperature_{2}}$': 2e-06,
 '$\\alpha$': 10.0,
 '$\\tau_{\\alpha}$': 1e-05,
 '$\\beta$': 0.0,
 '$\\tau_{\\beta}$': 5e-05,
 'start annealing step': 10000,
 '$\\left| \\latentactions \\right|$': 16,
 '$\\varepsilon$': 0.0,
 '$\\tau_{\\varepsilon}$': 5e-06,
 'prioritized experience replay': True,
 'priority exponent': 0.33,
 '$\\omega$': 0.4,
 '$\\tau_{\\omega}$': 7e-05,
 'bucket based': True,
 'collect steps per iteration': 16,
 'optimizer': 'Adam',
 'learning rate': 0.001,
 '$\\alpha_2$': 1.0,
 'layers': 2,
 'neurons': 256}

In [142]:
env_name = 'CartPole'

if df is None:
    df = pd.DataFrame(hyperparameters, index=[env_name]).transpose()
else:
    df = pd.concat([df, pd.DataFrame(hyperparameters, index=[env_name]).transpose()], axis=1)
    
df

Unnamed: 0,CartPole
batch size,128
activation,leaky relu
$\left| \latentstates \right|$,9
$\temperature_{1}^{\scriptscriptstyle \latentactions}$,0.99
$\temperature_{2}^{\scriptscriptstyle \latentactions}$,0.95
$\temperature_{1}^{\scriptscriptstyle \latentstates}$,0.67
$\temperature_{2}^{\scriptscriptstyle \latentstates}$,0.5
$\tau_{\temperature_{1}}$,0.000001
$\tau_{\temperature_{2}}$,0.000002
$\alpha$,10.0


## Acrobot-v1

In [143]:
params = json.loads('{"logtostderr": false, "alsologtostderr": false, "log_dir": "", "v": 0, "verbosity": 0, "logger_levels": {}, "stderrthreshold": "fatal", "showprefixforinfo": true, "run_with_pdb": false, "pdb_post_mortem": false, "pdb": false, "run_with_profiling": false, "profile_file": null, "use_cprofile_for_profiling": true, "only_check_args": false, "op_conversion_fallback_to_while_loop": true, "runtime_oom_exit": true, "hbm_oom_exit": true, "test_random_seed": 301, "test_srcdir": "", "test_tmpdir": "/local/4071980-1.master01.hydra.brussel.vsc/absl_testing", "test_randomize_ordering_seed": "", "xml_output_file": "", "batch_size": 128, "mixture_components": 1, "action_mixture_components": 0, "full_covariance": false, "activation": "relu", "latent_size": 13, "max_state_decoder_variance": 0.0, "encoder_temperature": 0.99, "prior_temperature": 0.95, "relaxed_state_encoder_temperature": 0.667, "relaxed_state_prior_temperature": 0.5, "latent_policy": true, "encoder_temperature_decay_rate": 1e-06, "prior_temperature_decay_rate": 2e-06, "entropy_regularizer_scale_factor": 10.0, "entropy_regularizer_decay_rate": 7.5e-05, "entropy_regularizer_scale_factor_min_value": 0.0, "marginal_entropy_regularizer_ratio": 0.0, "kl_annealing_scale_factor": 0.0, "kl_annealing_growth_rate": 7.5e-05, "start_annealing_step": 10000, "max_steps": 1500000, "save_dir": "/theia/data/brussel/102/vsc10293/vae_mdp/", "logdir": "/theia/data/brussel/102/vsc10293/vae_mdp/log/2021-08-25", "display_progressbar": false, "action_discretizer": false, "one_output_per_action": false, "do_not_eval": false, "full_vae_optimization": true, "relaxed_state_encoding": true, "number_of_discrete_actions": 16, "load_vae": "", "encoder_layers": [256, 256], "decoder_layers": [256, 256], "transition_layers": [256, 256], "label_transition_layers": [256, 256], "reward_layers": [256, 256], "discrete_policy_layers": [256, 256], "policy_path": "reinforcement_learning/saves/AcrobotRandomInit-v1/dqn_policy", "environment": "Acrobot-v1", "env_suite": "suite_gym", "policy_environment": "Acrobot-v1", "parallel_env": 8, "annealing_period": 1, "aggressive_training": false, "initial_collect_steps": 10000, "seed": 11111, "logs": true, "checkpoint": true, "epsilon_greedy": 0.5, "epsilon_greedy_decay_rate": 1e-05, "decompose_training": false, "prioritized_experience_replay": true, "priority_exponent": 0.3, "importance_sampling_exponent": 0.4, "importance_sampling_exponent_growth_rate": 7e-05, "buckets_based_priority": true, "collect_steps_per_iteration": 16, "hyperparameter_search": false, "hyperparameter_search_trials": 1, "prune_trials": false, "evaluation_window_size": 1, "wall_time": ".", "memory": -1.0, "time_stacked_states": 1, "state_encoder_pre_processing_network": false, "state_encoder_pre_processing_layers": [256, 256], "state_decoder_pre_processing_network": false, "state_decoder_pre_processing_layers": [256, 256], "optimizer": "Adam", "learning_rate": 0.0001, "local_losses_evaluation": true, "local_losses_evaluation_steps": 34000, "local_losses_replay_buffer_size": 200000, "evaluation_interval": 10000, "label_transition_function": true, "action_entropy_regularizer_scaling": 1.0, "reward_upper_bound": null, "reward_lower_bound": null, "generate_videos": false, "?": false, "help": false, "helpshort": false, "helpfull": false, "helpxml": false}')
hyperparameters = dict((map_key(key), value) for key, value in params.items() if key in hyperparameter_keys)
layers = None
neurons = None
for i in ['encoder', 'decoder', 'transition', 'label_transition', 'reward', 'discrete_policy']:
    if layers is None and neurons is None:
        layers = len(params[i + '_layers'])
        neurons = params[i + '_layers'][0]
    assert(layers == len(params[i + '_layers']))
    for layer in params[i + '_layers']:
        assert(layer == neurons)

hyperparameters['layers'] = layers
hyperparameters['neurons'] = neurons

env_name = 'Acrobot'

if df is None:
    df = pd.DataFrame(hyperparameters, index=[env_name]).transpose()
else:
    df = pd.concat([df, pd.DataFrame(hyperparameters, index=[env_name]).transpose()], axis=1)
    
df

Unnamed: 0,CartPole,Acrobot
batch size,128,128
activation,leaky relu,relu
$\left| \latentstates \right|$,9,13
$\temperature_{1}^{\scriptscriptstyle \latentactions}$,0.99,0.99
$\temperature_{2}^{\scriptscriptstyle \latentactions}$,0.95,0.95
$\temperature_{1}^{\scriptscriptstyle \latentstates}$,0.67,0.667
$\temperature_{2}^{\scriptscriptstyle \latentstates}$,0.5,0.5
$\tau_{\temperature_{1}}$,0.000001,0.000001
$\tau_{\temperature_{2}}$,0.000002,0.000002
$\alpha$,10.0,10.0


## Export to latex

In [144]:
for environment in df.columns:
    if df[df.index == 'prioritized experience replay'][environment].all():
        if df[df.index == 'bucket based'][environment].all():
            df.loc['prioritized experience replay', environment] = 'buckets'
        else:
            df.loc['prioritized experience replay', environment] = 'loss'
    else:
        df.loc['prioritized experience replay', environment] = 'uniform'
df = df.drop(index='bucket based')

In [148]:
sort_table = ['batch size', 'activation', 'layers', 'neurons', 'optimizer',
              'learning rate', 'collect steps per iteration',
              '$\\left| \\latentstates \\right|$',
              '$\\left| \\latentactions \\right|$',
              '$\\temperature_{1}^{\\scriptscriptstyle \\latentstates}$',
              '$\\temperature_{2}^{\\scriptscriptstyle \\latentstates}$',
              '$\\temperature_{1}^{\\scriptscriptstyle \\latentactions}$',
              '$\\temperature_{2}^{\\scriptscriptstyle \\latentactions}$',
              '$\\tau_{\\temperature_{1}}$',
              '$\\tau_{\\temperature_{2}}$',
              '$\\alpha$', '$\\alpha_2$', '$\\tau_{\\alpha}$',
              '$\\beta$', '$\\tau_{\\beta}$',
              'prioritized experience replay',
              'priority exponent',
              '$\\omega$',
              '$\\tau_{\\omega}$'
              ]
def sort(key):
    if key in sort_table:
        return sort_table.index(key)
    else:
        return len(sort_table)

df = df.sort_index(key=lambda x: list(map(sort, x)))

In [150]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', -1)
print(df.to_latex(float_format="${:.2g}$".format, escape=False))

\begin{tabular}{lll}
\toprule
{} &    CartPole &   Acrobot \\
\midrule
batch size                                             &  128 &  128 \\
activation                                             &  leaky relu &  relu \\
layers                                                 &  2 &  2 \\
neurons                                                &  256 &  256 \\
optimizer                                              &  Adam &  Adam \\
learning rate                                          & $0.001$ & $0.0001$ \\
collect steps per iteration                            &  16 &  16 \\
$\left| \latentstates \right|$                         &  9 &  13 \\
$\left| \latentactions \right|$                        &  16 &  16 \\
$\temperature_{1}^{\scriptscriptstyle \latentstates}$  & $0.67$ & $0.67$ \\
$\temperature_{2}^{\scriptscriptstyle \latentstates}$  & $0.5$ & $0.5$ \\
$\temperature_{1}^{\scriptscriptstyle \latentactions}$ & $0.99$ & $0.99$ \\
$\temperature_{2}^{\scriptscriptstyle \latentacti

  
