In [1]:
from Environment import Car, CarEnv
from ModelsTorch import Actor, Critic
from AgentTorch import PPOAgent
import pygame
from utils import compute_borders, scale_image
import wandb
import pprint
import itertools
import random
import torch
import numpy as np  

pygame 2.1.3 (SDL 2.0.22, Python 3.11.5)
Hello from the pygame community. https://www.pygame.org/contribute.html


In [2]:
seed = 42
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
torch.use_deterministic_algorithms(True)

circuit_path = 'images\\circuits\\level3.png'
circuit_edges, finish_edges, finish_position = compute_borders(circuit_path, "images\\finish_template.png")
track_img = pygame.image.load(circuit_path)
finish_img = pygame.image.load("images\\finish_image.png")
car_img = scale_image(pygame.image.load("images\\red-car.png"), 0.35)
images = [(track_img, (0, 0)), (finish_img, finish_position)]

car = Car(car_img, acceleration=0.2, num_radars=9)
env = CarEnv(car, circuit_edges, finish_edges, num_actions=6)

experiment = int(input('Número del experimento'))
saves = 'saves_tfm\\' + str(experiment)
wandb.tensorboard.patch(root_logdir='runs\\ppo_experiment_tfm\\'+str(experiment), tensorboard_x=False, save=False)

def hp_opt(config=None):
    # Seeds set within the function so that all trials are reproducible (resetting the state of the pseudo-random number generators).
    seed = 42
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.use_deterministic_algorithms(True)
    # Initialize a new wandb run
    with wandb.init(config=config):
        # If called by wandb.agent, as below,
        # this config will be set by Sweep Controller
        config = wandb.config

        actor = Actor(**config)
        critic = Critic(**config)

        wandb.watch(actor, log_freq=10)
        wandb.watch(critic, log_freq=10)

        agent = PPOAgent(actor, critic, log_dir='runs\\ppo_experiment_tfm\\'+str(experiment), **config)
        _, _ = agent.train(env, env, images, save_path=None, updates_per_flush=20, val_fps=None, val_plot=False, val_verbose=False)

In [3]:
def get_sweep_config(sweep_decay=None):
    combinations = []
    number_hidden_layers = [2, 3, 4]  # Possible number of hidden layers
    possible_hidden_sizes = [150, 250, 350]  # Possible neurons of hidden layers
    for size in number_hidden_layers:
        combinations.extend(itertools.product(possible_hidden_sizes, repeat=size))
    combinations = [list(comb) for comb in combinations]  # Every possible combination
    
    sweep_config = {
        "name": "Sweep_TFM",
        "method": "random",
        "metric": {
            "goal": "maximize",
            "name": "Reward/Mean_val_reward"
        },
        "parameters": {
            # Models
            "input_size": {
                "value": 10  # Must be even (odd number of radars + current velocity)
            },
            "hidden_sizes": {
                "values": combinations
            },
            "output_size": {
                "value": 6  # Number of actions
            },
            "dropout_prob": {
                "values": [0, 0.1, 0.2, 0.3]
            },
            "activation": {
                "values": ["tanh", "lrelu"]  # Activation layers
            },
            "lrelu": {
                "values": [0.001, 0.01, 0.1]  # If activation=LeakyReLU, alpha parameter
            },
            "momentum": {
                "values": [0.8, 0.9, 0.95, 0.99]  # Momentum of Batch Normalization layers (if there are)
            },
            "bn": {
                "values": [True, False]  # If there are Batch Normalization layers
            },
            "initialization": {
                "values": ["orthogonal", "normal", "uniform"]  # Initialization method (layer weights)
            },
                   
            # Agent + training
            'actor_lr': {
                'distribution': 'log_uniform_values',   
                'min': 1e-4,
                'max': 1e-2
            },
            'critic_lr': {   
                'distribution': 'log_uniform_values',  
                'min': 1e-4,
                'max': 1e-2
            },
            "value_loss_factor": {
                'value': 1
            },
            "entropy": {
                "distribution": "uniform",
                "min": 1e-4,
                "max": 5e-2
            },
            "gamma":  {
                "values": [0.9, 0.95, 0.99]
            },
            "GAE_lambda": {
                "value": 0.95
            },
            "clipping_epsilon": {
                "value": 0.2
            },
            "l1_factor": {
                "distribution": "log_uniform_values",
                "min": 1e-6,
                "max": 1e-3
            },
            "l2_factor": {
                "distribution": "log_uniform_values",
                "min": 1e-6,
                "max": 1e-3
            },
            "T": {
                "values": [256, 512, 768, 1024]
            },
            'minibatch_size': {
                "values": [32, 64, 128, 256]
            },
            "epochs": {
                "value": 10 #[5, 10, 15, 20]
            },
            "updates": {
                "value": 200
            },
            "val_episodes": {
                "value": 10
            },
            "updates_per_val": {
                "value": 1
            },
            "target_kl": {
                "values": [0.01, 0.02, 0.03]
            },
            "adv_std": {
                "values": [True, False]
            },
            "early_stopping_patience": {
                "value": 30  # If it is too low, it may interfere with plateau reduction.
            },
            "early_stopping_delta": {
                "value": 0.00
            },
        },
    }

    if sweep_decay == 'plateau':
        sweep_config["parameters"].update({
            "decay_method": {
                "value": "plateau"
            },
            "plateau_factor": {
                "distribution": "uniform",
                "min": 0.01,
                "max": 0.9
            },
            "plateau_patience": {
                "values": [5, 10]
            }
        })

    elif sweep_decay == 'exponential':
        sweep_config["parameters"].update({
            "decay_method": {
                "value": "exponential"
            },
            "exponential_factor": {
                "distribution": "log_uniform_values",
                "min": 0.85,
                "max": 0.999
            }
        })

    elif sweep_decay == 'linear':
        sweep_config["parameters"].update({
            "decay_method": {
                "value": "linear"
            },
            "linear_end_factor": {
                "distribution": "log_uniform_values",
                "min": 0.85,
                "max": 0.999
            }
        })

    else:
        sweep_config["parameters"].update({
            "decay_method": {
                "value": None
            },
        })

    return sweep_config

sweep_config = get_sweep_config(sweep_decay='exponential')
sweep_id = wandb.sweep(sweep=sweep_config, project="TFM_project")
pprint.pprint(sweep_config)
sweep_id

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Create sweep with ID: 62savqn0
Sweep URL: https://wandb.ai/antoniosg00/TFM_project/sweeps/62savqn0
{'method': 'random',
 'metric': {'goal': 'maximize', 'name': 'Reward/Mean_val_reward'},
 'name': 'Sweep_TFM',
 'parameters': {'GAE_lambda': {'value': 0.95},
                'T': {'values': [256, 512, 768, 1024]},
                'activation': {'values': ['tanh', 'lrelu']},
                'actor_lr': {'distribution': 'log_uniform_values',
                             'max': 0.01,
                             'min': 0.0001},
                'adv_std': {'values': [True, False]},
                'bn': {'values': [True, False]},
                'clipping_epsilon': {'value': 0.2},
                'critic_lr': {'distribution': 'log_uniform_values',
                              'max': 0.01,
                              'min': 0.0001},
                'decay_method': {'value': 'exponential'},
                'dropout_prob': {'values': [0, 0.1, 0.2, 0.3]},
                'early_stopping_delta':

'62savqn0'

In [4]:
wandb.agent(sweep_id, hp_opt, project="TFM_project", count=100)
wandb.finish()

[34m[1mwandb[0m: Agent Starting Run: joopscwv with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 512
[34m[1mwandb[0m: 	activation: lrelu
[34m[1mwandb[0m: 	actor_lr: 0.004081535403239059
[34m[1mwandb[0m: 	adv_std: False
[34m[1mwandb[0m: 	bn: True
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.0002147731490633399
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.3
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.0066479705680139445
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.9718100390147708
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	hidden_sizes: [150, 150, 250, 350]
[34m[1mwandb[0m: 	initialization: uniform
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 0.00034222243641892795
[34m[1mwandb[0m: 	l2_factor: 2.9459499540515144e-05
[34m[1mwandb

Config del trial
{'GAE_lambda': 0.95, 'T': 512, 'activation': 'lrelu', 'actor_lr': 0.004081535403239059, 'adv_std': False, 'bn': True, 'clipping_epsilon': 0.2, 'critic_lr': 0.0002147731490633399, 'decay_method': 'exponential', 'dropout_prob': 0.3, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.0066479705680139445, 'epochs': 10, 'exponential_factor': 0.9718100390147708, 'gamma': 0.99, 'hidden_sizes': [150, 150, 250, 350], 'initialization': 'uniform', 'input_size': 10, 'l1_factor': 0.00034222243641892795, 'l2_factor': 2.9459499540515144e-05, 'lrelu': 0.001, 'minibatch_size': 64, 'momentum': 0.95, 'output_size': 6, 'target_kl': 0.03, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [150, 150, 250, 350], 'output_size': 6, 'dropout_prob': 0.3, 'activation': 'lrelu', 'lrelu': 0.001, 'bn': True, 'momentum': 0.95, 'initialization': 'uniform', 'GAE_lambda': 0.95, 'T': 512, 'actor_lr': 0.

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▁▃▂▂▂▂▂▅▃▂▃▃▄▄▃▄▄▃▅▅▆▃▄▄▄▄▄▄▄▄▅█▅▅▄▇▅▄▅
Duration/Mean_val_ep_duration,▁▁▂▂▁▂▄▂▃▂▂▂▃▄▄▃▃▆▅▆▇▇▇▅▅▅▅▅▆▇▅▅█▅▅▅▅▅▅▅
Learning_rate/Actor,█▇▇▆▅▅▄▄▄▃▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,█▇▇▆▅▅▄▄▄▃▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,▅█▅▆▆▇▅▆▄▅▇▅▅▆▄▅▃▂▃▃▂▃▃▁▁▂▆▃▃▂▃▃▁▁▄▁▅▃▃▂
Loss/Critic_loss,█▇▆▃▃▅▁▂▆▂▃▃▄▅▃▅▄▄▄▄▁▂▄▂▁▂▃▁▃▂▃▄▅▂▁▁▃▂▂▁
Loss/Entropy_bonus,█▅▅▃▄▄▄▄▆▆▅▆▅▆▅▆▅▅▄▅▄▄▃▁▁▂▃▂▁▂▁▃▃▂▃▁▃▃▂▂
Loss/KL_divergence,▆▆▅▂▁▃▄▁▅▃▆▃▃▃▄▄▃▄▂▃▃▃▃▄▂▃█▃▅▅▄▃▇▄▇▃▆▃▄▃
Loss/Policy_loss,▅█▅▆▆▇▅▆▄▅▇▅▅▆▄▅▃▂▃▃▂▃▃▁▁▂▆▃▃▂▃▃▁▁▄▁▅▃▃▂
Loss/Regularized_Actor_loss,▅█▅▆▆▇▅▆▄▅▇▅▅▅▄▅▃▂▃▃▂▃▃▁▁▂▆▃▃▂▃▃▁▁▃▁▅▃▃▂

0,1
Duration/Mean_train_ep_duration,131.33333
Duration/Mean_val_ep_duration,131.89999
Learning_rate/Actor,4e-05
Learning_rate/Critic,0.0
Loss/Actor_loss,-1.75793
Loss/Critic_loss,4.822
Loss/Entropy_bonus,0.58908
Loss/KL_divergence,-0.00914
Loss/Policy_loss,-1.75402
Loss/Regularized_Actor_loss,0.13069


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: k9ou4bcn with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 1024
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	actor_lr: 0.0006178173389342254
[34m[1mwandb[0m: 	adv_std: False
[34m[1mwandb[0m: 	bn: True
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.003042924238226867
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.018354509685286766
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.9132888346855556
[34m[1mwandb[0m: 	gamma: 0.95
[34m[1mwandb[0m: 	hidden_sizes: [350, 350, 150]
[34m[1mwandb[0m: 	initialization: uniform
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 2.98572053816666

Config del trial
{'GAE_lambda': 0.95, 'T': 1024, 'activation': 'tanh', 'actor_lr': 0.0006178173389342254, 'adv_std': False, 'bn': True, 'clipping_epsilon': 0.2, 'critic_lr': 0.003042924238226867, 'decay_method': 'exponential', 'dropout_prob': 0, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.018354509685286766, 'epochs': 10, 'exponential_factor': 0.9132888346855556, 'gamma': 0.95, 'hidden_sizes': [350, 350, 150], 'initialization': 'uniform', 'input_size': 10, 'l1_factor': 2.985720538166668e-06, 'l2_factor': 3.2565996268418414e-05, 'lrelu': 0.1, 'minibatch_size': 64, 'momentum': 0.99, 'output_size': 6, 'target_kl': 0.03, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [350, 350, 150], 'output_size': 6, 'dropout_prob': 0, 'activation': 'tanh', 'lrelu': 0.1, 'bn': True, 'momentum': 0.99, 'initialization': 'uniform', 'GAE_lambda': 0.95, 'T': 1024, 'actor_lr': 0.0006178173389342254,

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▁▂▂▃▄▃▃▄▄▄▃▄▃█▄▅▄▄▅▇▄▅█▄▇▇▇▄▄▆▅▆▆▇▆▇▅▆▄
Duration/Mean_val_ep_duration,▁▂▂▃▄▃▅▄▄▃▃▃▅▄▄▅▆▇█▆▇▇█▇▇▇▇▇▇▇▆▆▆▆▆▆▆▅▇▆
Learning_rate/Actor,█▆▅▄▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,█▆▅▄▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,█▆▅▄▄▄▄▄▅▃▃▃▃▃▁▂▁▂▃▂▁▂▂▁▃▂▁▂▃▃▂▂▂▂▂▃▁▃▃▃
Loss/Critic_loss,█▄▃▃▁▂▁▁▂▁▁▂▃▂▂▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▂▂▂▂▂▂
Loss/Entropy_bonus,██▆▆▆▆▆▅▅▅▄▃▃▃▃▃▂▃▂▂▂▁▂▁▂▂▂▂▂▁▂▁▁▂▂▂▁▁▂▁
Loss/KL_divergence,▆▄▄▄▄▄▅▃▃▆▃▄▅▃▅▃▄▄▄▄▄▄▅▄▄▅▂▃█▃▃▃▄▄▄▃▁▄▅▄
Loss/Policy_loss,█▆▅▄▄▄▄▄▅▃▃▃▃▃▁▂▁▂▃▂▁▂▂▁▃▂▁▂▃▃▂▂▂▂▂▃▁▃▃▃
Loss/Regularized_Actor_loss,█▆▅▄▄▄▄▄▅▃▃▃▃▃▁▂▁▂▃▂▁▂▂▁▃▂▁▂▃▃▂▂▂▂▂▃▁▃▃▃

0,1
Duration/Mean_train_ep_duration,184.0
Duration/Mean_val_ep_duration,302.89999
Learning_rate/Actor,0.0
Learning_rate/Critic,0.0
Loss/Actor_loss,-7.32534
Loss/Critic_loss,4.88837
Loss/Entropy_bonus,0.97592
Loss/KL_divergence,0.00182
Loss/Policy_loss,-7.30743
Loss/Regularized_Actor_loss,-7.21806


[34m[1mwandb[0m: Agent Starting Run: iiott9cu with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 512
[34m[1mwandb[0m: 	activation: lrelu
[34m[1mwandb[0m: 	actor_lr: 0.0008667540373874765
[34m[1mwandb[0m: 	adv_std: False
[34m[1mwandb[0m: 	bn: True
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.0004633527609453167
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.1
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.03280638492738754
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.8998007601769701
[34m[1mwandb[0m: 	gamma: 0.95
[34m[1mwandb[0m: 	hidden_sizes: [250, 150, 350, 350]
[34m[1mwandb[0m: 	initialization: uniform
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 0.0009737354579177352
[34m[1mwandb[0m: 	l2_factor: 0.0004333729750596256
[34m[1mwandb[0

Config del trial
{'GAE_lambda': 0.95, 'T': 512, 'activation': 'lrelu', 'actor_lr': 0.0008667540373874765, 'adv_std': False, 'bn': True, 'clipping_epsilon': 0.2, 'critic_lr': 0.0004633527609453167, 'decay_method': 'exponential', 'dropout_prob': 0.1, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.03280638492738754, 'epochs': 10, 'exponential_factor': 0.8998007601769701, 'gamma': 0.95, 'hidden_sizes': [250, 150, 350, 350], 'initialization': 'uniform', 'input_size': 10, 'l1_factor': 0.0009737354579177352, 'l2_factor': 0.0004333729750596256, 'lrelu': 0.001, 'minibatch_size': 64, 'momentum': 0.9, 'output_size': 6, 'target_kl': 0.03, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [250, 150, 350, 350], 'output_size': 6, 'dropout_prob': 0.1, 'activation': 'lrelu', 'lrelu': 0.001, 'bn': True, 'momentum': 0.9, 'initialization': 'uniform', 'GAE_lambda': 0.95, 'T': 512, 'actor_lr': 0.00086

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▄▄▄▄▄▄▅▄▄▆▆▅▅▆▆▅█▆▆▆██▇▆▇▅▆▆▆▇▇▆▄▇▇▆▇▅▆
Duration/Mean_val_ep_duration,▁▃▄▅▇▄▄▅▄▆▅▅▇▅▆▆▆▇▇▇▇▆▆▇▇██▇▆▇▇▆▅█▇▇▇▇▆▇
Learning_rate/Actor,█▇▆▅▄▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,█▇▆▅▄▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,▆▄▇▃▅▄▆▅▅█▄▅▄▅▃▃▄▂▅▄▄▁▃▃▄▃▁▂▅▆▃▂▃▆▁▃▂▃▃▃
Loss/Critic_loss,█▂▇▄▂▄▁▇▃▆▃▇▆▃▂▁▃▁▂▂▄▃▅▃▁▂▂▁▃▃▃▁▁▄▄▁▃▁▄▂
Loss/Entropy_bonus,█▇▇▆▆▅▄▅▄▃▄▃▂▂▂▂▂▁▂▂▂▂▂▁▂▂▁▁▂▂▁▂▂▂▂▁▁▁▂▁
Loss/KL_divergence,▅▄▃▄▅▅▄▄▄█▇▅▆▆▅▄▆▄▅▄▄▄▄▃▅▃▄▃▃▆▄▄▅▁▃▄▄▃▂▄
Loss/Policy_loss,▆▄▇▃▅▄▆▅▅█▄▅▄▅▃▃▄▂▅▄▄▁▃▃▄▃▁▂▅▆▃▂▃▆▁▃▂▃▃▃
Loss/Regularized_Actor_loss,█▆▇▄▅▅▆▅▅█▄▅▄▅▃▃▄▂▅▃▄▁▃▃▄▂▁▂▅▅▃▂▃▆▁▃▂▃▃▃

0,1
Duration/Mean_train_ep_duration,93.8
Duration/Mean_val_ep_duration,101.3
Learning_rate/Actor,0.0
Learning_rate/Critic,0.0
Loss/Actor_loss,1.90344
Loss/Critic_loss,3.66135
Loss/Entropy_bonus,0.62508
Loss/KL_divergence,-0.01041
Loss/Policy_loss,1.92395
Loss/Regularized_Actor_loss,9.19122


[34m[1mwandb[0m: Agent Starting Run: twevkyhp with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 512
[34m[1mwandb[0m: 	activation: lrelu
[34m[1mwandb[0m: 	actor_lr: 0.0026916496147356066
[34m[1mwandb[0m: 	adv_std: False
[34m[1mwandb[0m: 	bn: False
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.0026668159874614827
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.2
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.003409201408147694
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.9377253153912086
[34m[1mwandb[0m: 	gamma: 0.9
[34m[1mwandb[0m: 	hidden_sizes: [350, 150, 350, 150]
[34m[1mwandb[0m: 	initialization: orthogonal
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 2.204621819182447e-06
[34m[1mwandb[0m: 	l2_factor: 0.0004910229248869546
[34m[1mwand

Config del trial
{'GAE_lambda': 0.95, 'T': 512, 'activation': 'lrelu', 'actor_lr': 0.0026916496147356066, 'adv_std': False, 'bn': False, 'clipping_epsilon': 0.2, 'critic_lr': 0.0026668159874614827, 'decay_method': 'exponential', 'dropout_prob': 0.2, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.003409201408147694, 'epochs': 10, 'exponential_factor': 0.9377253153912086, 'gamma': 0.9, 'hidden_sizes': [350, 150, 350, 150], 'initialization': 'orthogonal', 'input_size': 10, 'l1_factor': 2.204621819182447e-06, 'l2_factor': 0.0004910229248869546, 'lrelu': 0.01, 'minibatch_size': 64, 'momentum': 0.9, 'output_size': 6, 'target_kl': 0.02, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [350, 150, 350, 150], 'output_size': 6, 'dropout_prob': 0.2, 'activation': 'lrelu', 'lrelu': 0.01, 'bn': False, 'momentum': 0.9, 'initialization': 'orthogonal', 'GAE_lambda': 0.95, 'T': 512, 'actor_lr': 0

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▂▂▂▄▂▂▃▄▄▄▄▅▄▄▅▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄█▄▄▅▅▄
Duration/Mean_val_ep_duration,▁▁▂▆▇▂▃▇▇▇▇▇▆▇▆▇█▇▇▇▇█▇▇█████████▇█▇▇██▇
Learning_rate/Actor,█▇▆▅▅▄▄▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,█▇▆▅▅▄▄▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,▇▄█▅▁█▇▄▁▁▂▂▂▂▂▃▂▂▃▃▂▃▁▂▂▂▂▂▂▂▂▁▃▁▂▁▂▂▁▂
Loss/Critic_loss,█▃▃▂▂▃▃█▂▂▁▁▁▁▂▃▁▂▂▃▁▂▁▁▁▂▁▂▁▁▁▁▁▁▃▁▂▁▁▂
Loss/Entropy_bonus,█▄▃▂▁▁▂▁▁▁▂▂▃▂▂▂▂▂▃▃▃▄▂▄▃▃▃▄▃▄▃▂▄▃▆▂▂▄▃▄
Loss/KL_divergence,█▇▅▁▅▅▃▆▅▄▅▅▆▅▃▅▄▅▅▅▄▅▅▃▃▄▅▅▅▃▅▄▃▃▃▄▄▄▄▄
Loss/Policy_loss,▇▄█▅▁█▇▄▁▁▂▂▂▂▂▃▂▂▃▃▂▃▁▂▂▂▂▂▂▂▂▁▃▁▂▁▂▂▁▂
Loss/Regularized_Actor_loss,▇▄█▅▁█▇▄▁▁▂▂▂▂▂▃▂▂▃▃▂▃▁▂▂▂▂▂▂▂▂▁▃▁▂▁▂▂▁▂

0,1
Duration/Mean_train_ep_duration,146.0
Duration/Mean_val_ep_duration,139.60001
Learning_rate/Actor,0.0
Learning_rate/Critic,0.0
Loss/Actor_loss,-0.5138
Loss/Critic_loss,2.95521
Loss/Entropy_bonus,0.51711
Loss/KL_divergence,0.01188
Loss/Policy_loss,-0.51203
Loss/Regularized_Actor_loss,-0.18413


[34m[1mwandb[0m: Agent Starting Run: dmzw3ifu with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 256
[34m[1mwandb[0m: 	activation: lrelu
[34m[1mwandb[0m: 	actor_lr: 0.0010790805637155964
[34m[1mwandb[0m: 	adv_std: True
[34m[1mwandb[0m: 	bn: True
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.0004275123415819555
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.3
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.03000318740599483
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.9227523336701128
[34m[1mwandb[0m: 	gamma: 0.95
[34m[1mwandb[0m: 	hidden_sizes: [250, 350, 250]
[34m[1mwandb[0m: 	initialization: orthogonal
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 5.167706531893088e-06
[34m[1mwandb[0m: 	l2_factor: 0.00022333691764515068
[34m[1mwandb[0m:

Config del trial
{'GAE_lambda': 0.95, 'T': 256, 'activation': 'lrelu', 'actor_lr': 0.0010790805637155964, 'adv_std': True, 'bn': True, 'clipping_epsilon': 0.2, 'critic_lr': 0.0004275123415819555, 'decay_method': 'exponential', 'dropout_prob': 0.3, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.03000318740599483, 'epochs': 10, 'exponential_factor': 0.9227523336701128, 'gamma': 0.95, 'hidden_sizes': [250, 350, 250], 'initialization': 'orthogonal', 'input_size': 10, 'l1_factor': 5.167706531893088e-06, 'l2_factor': 0.00022333691764515068, 'lrelu': 0.1, 'minibatch_size': 32, 'momentum': 0.95, 'output_size': 6, 'target_kl': 0.02, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [250, 350, 250], 'output_size': 6, 'dropout_prob': 0.3, 'activation': 'lrelu', 'lrelu': 0.1, 'bn': True, 'momentum': 0.95, 'initialization': 'orthogonal', 'GAE_lambda': 0.95, 'T': 256, 'actor_lr': 0.00107908056

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▂▂▂▃▃▃▃▄▃▄▂▄▄▃▅▇▃▆▄▄▇▆▃▇▆▅▆▄▃▆▃▃▅▃▇▃█▇▇
Duration/Mean_val_ep_duration,▁▂▂▅▄▃▃▃▄▄▄▄▄▄▆▅▆▅▇▆▅▅▇▆█▆▆█▆█▅▄▅▆▇▆▇▅▆▆
Learning_rate/Actor,█▆▅▄▄▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,█▆▅▄▄▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,▁▂▂▄▄▃▅▄▃▃▄▄▅▄▅▄█▅▄▄▇▇▄▆▄▅▄▆▄▃▆▄▄▅▇▅▇▅▅▆
Loss/Critic_loss,█▃▃▂▂▂▂▂▂▃▂▂▁▁▁▁▂▂▁▂▃▂▂▁▂▃▁▂▂▂▂▁▁▂▂▁▂▁▂▁
Loss/Entropy_bonus,█▆▆▅▄▅▄▄▄▄▄▄▃▄▂▂▂▂▃▂▁▂▂▃▁▂▃▁▂▂▁▂▁▁▂▁▂▁▂▂
Loss/KL_divergence,▅▄▅▄▆▄▄▅▄▆▆▅▄▅▆▆▆▆▆▆▄▆▆█▁▆▆▃▆▄▆▆▅▅▄▆▄▇▅▇
Loss/Policy_loss,▁▂▂▄▃▃▅▃▃▃▄▄▅▃▅▄█▅▄▄▇▇▄▆▃▅▄▆▄▃▆▄▄▄▇▄▇▅▄▆
Loss/Regularized_Actor_loss,▇▅▃▅▄▃▅▂▁▁▂▃▄▂▄▂█▄▂▃▇▇▂▅▂▄▃▅▃▁▅▂▂▃▇▃▆▃▃▅

0,1
Duration/Mean_val_ep_duration,119.6
Learning_rate/Actor,0.0
Learning_rate/Critic,0.0
Loss/Actor_loss,0.00296
Loss/Critic_loss,2.92928
Loss/Entropy_bonus,1.3711
Loss/KL_divergence,0.0283
Loss/Policy_loss,0.0441
Loss/Regularized_Actor_loss,0.15829
Metric/Explained_variance,0.78099


[34m[1mwandb[0m: Agent Starting Run: 6xcdyn8t with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 1024
[34m[1mwandb[0m: 	activation: lrelu
[34m[1mwandb[0m: 	actor_lr: 0.0034898427635440795
[34m[1mwandb[0m: 	adv_std: True
[34m[1mwandb[0m: 	bn: False
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.001294138141313056
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.3
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.022638663129059593
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.8767868679753683
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	hidden_sizes: [350, 250, 350, 250]
[34m[1mwandb[0m: 	initialization: uniform
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 2.6854216818675228e-05
[34m[1mwandb[0m: 	l2_factor: 0.0007149183015068061
[34m[1mwandb

Config del trial
{'GAE_lambda': 0.95, 'T': 1024, 'activation': 'lrelu', 'actor_lr': 0.0034898427635440795, 'adv_std': True, 'bn': False, 'clipping_epsilon': 0.2, 'critic_lr': 0.001294138141313056, 'decay_method': 'exponential', 'dropout_prob': 0.3, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.022638663129059593, 'epochs': 10, 'exponential_factor': 0.8767868679753683, 'gamma': 0.99, 'hidden_sizes': [350, 250, 350, 250], 'initialization': 'uniform', 'input_size': 10, 'l1_factor': 2.6854216818675228e-05, 'l2_factor': 0.0007149183015068061, 'lrelu': 0.001, 'minibatch_size': 64, 'momentum': 0.95, 'output_size': 6, 'target_kl': 0.02, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [350, 250, 350, 250], 'output_size': 6, 'dropout_prob': 0.3, 'activation': 'lrelu', 'lrelu': 0.001, 'bn': False, 'momentum': 0.95, 'initialization': 'uniform', 'GAE_lambda': 0.95, 'T': 1024, 'actor_lr': 0

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▂▂▆▇▆▆▇▇▆▇▆▇▇▇█▇█▇▇▇▇█▇█▇██▇█▇▇█▇▇▇▇▇██
Duration/Mean_val_ep_duration,▁▁▂▅▅▅▆▇▆▇▆▇▇▇▇█▇▇▇▇▇▇█▇▇▇█▇█▇██▇▇▇▇▇█▇▇
Learning_rate/Actor,█▆▅▃▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,█▆▅▃▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,█▁▃▃▂▁▁▂▃▃▃▃▃▃▃▃▂▂▃▃▂▃▃▃▃▃▃▃▃▂▃▃▃▃▃▂▂▃▃▃
Loss/Critic_loss,█▁▂▃▂▂▂▂▂▁▁▁▁▁▂▁▂▂▁▂▂▁▂▂▁▂▁▁▁▁▁▁▂▁▁▁▁▂▁▂
Loss/Entropy_bonus,▁▅▇▇▇████▆▇▆▆▅▆▆▅▆▆▅▆▅▅▅▆▅▅▅▅▅▅▆▅▅▄▅▄▆▅▅
Loss/KL_divergence,█▁▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▄▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▄▃
Loss/Policy_loss,█▁▃▄▃▂▂▂▃▃▃▃▃▃▃▄▃▃▃▃▃▃▃▃▃▃▃▃▃▂▃▃▃▃▃▂▂▃▃▃
Loss/Regularized_Actor_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
Duration/Mean_train_ep_duration,152.8
Duration/Mean_val_ep_duration,143.60001
Learning_rate/Actor,0.0
Learning_rate/Critic,0.0
Loss/Actor_loss,0.01013
Loss/Critic_loss,6.61054
Loss/Entropy_bonus,0.64674
Loss/KL_divergence,0.00889
Loss/Policy_loss,0.02477
Loss/Regularized_Actor_loss,0.0328


[34m[1mwandb[0m: Agent Starting Run: e5y7lb6m with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 768
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	actor_lr: 0.00029850049215536257
[34m[1mwandb[0m: 	adv_std: False
[34m[1mwandb[0m: 	bn: True
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.006136579764319283
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.2
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.028329409312531133
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.9283635593197244
[34m[1mwandb[0m: 	gamma: 0.9
[34m[1mwandb[0m: 	hidden_sizes: [350, 150, 250, 150]
[34m[1mwandb[0m: 	initialization: normal
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 4.857720946196461e-06
[34m[1mwandb[0m: 	l2_factor: 0.0003143359583178352
[34m[1mwandb[0m:

Config del trial
{'GAE_lambda': 0.95, 'T': 768, 'activation': 'tanh', 'actor_lr': 0.00029850049215536257, 'adv_std': False, 'bn': True, 'clipping_epsilon': 0.2, 'critic_lr': 0.006136579764319283, 'decay_method': 'exponential', 'dropout_prob': 0.2, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.028329409312531133, 'epochs': 10, 'exponential_factor': 0.9283635593197244, 'gamma': 0.9, 'hidden_sizes': [350, 150, 250, 150], 'initialization': 'normal', 'input_size': 10, 'l1_factor': 4.857720946196461e-06, 'l2_factor': 0.0003143359583178352, 'lrelu': 0.01, 'minibatch_size': 64, 'momentum': 0.99, 'output_size': 6, 'target_kl': 0.03, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [350, 150, 250, 150], 'output_size': 6, 'dropout_prob': 0.2, 'activation': 'tanh', 'lrelu': 0.01, 'bn': True, 'momentum': 0.99, 'initialization': 'normal', 'GAE_lambda': 0.95, 'T': 768, 'actor_lr': 0.000298500

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▃▄▅▆▇█▇▇▇▆▇▆▆▆▆▇▆▆▆▇▅▅▇▇▇▇▆▇▆▅▆▆▇▆▅▆▇▆▆
Duration/Mean_val_ep_duration,▁▃▃▄▅▅▆███▆▇█▆▇▇█▇▅▇▆▆▆▆▄▇▆▆▆▇▇█▆▅▆▆▆▇▆▆
Learning_rate/Actor,█▇▆▅▄▃▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,█▇▆▅▄▃▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,█▅▄▄▄▄▄▂▂▃▂▃▃▅▁▃▅▂▂▃▄▂▅▁▄▃▅▃▁▃▃▄▅▃▅▆▃▂▅▄
Loss/Critic_loss,█▅▄▃▃▄▂▁▁▃▁▂▁▄▁▂▃▁▁▁▂▂▃▁▂▂▃▂▁▂▂▂▂▁▃▄▁▁▂▂
Loss/Entropy_bonus,█▇▅▃▄▃▃▂▂▂▂▃▂▃▂▂▂▂▂▂▂▁▁▂▁▂▂▁▁▁▁▂▂▂▂▁▁▁▁▂
Loss/KL_divergence,▆▆▅▇▄▆▆▄▆▅▄▅▅▅▄▅▄▄▅▄▄▄▃██▆▇▅▅▄▁▅█▄▅▃▅▄▅▃
Loss/Policy_loss,█▅▄▄▄▄▄▂▂▃▂▃▃▅▁▃▅▂▂▃▄▂▅▁▄▃▅▃▁▃▃▄▅▃▅▆▃▂▅▄
Loss/Regularized_Actor_loss,█▅▄▄▄▄▄▂▂▃▂▃▃▅▁▃▅▂▂▃▄▂▅▁▄▃▅▃▁▃▃▄▅▃▅▆▃▂▅▄

0,1
Duration/Mean_train_ep_duration,112.16666
Duration/Mean_val_ep_duration,116.9
Learning_rate/Actor,0.0
Learning_rate/Critic,0.0
Loss/Actor_loss,0.96783
Loss/Critic_loss,2.33398
Loss/Entropy_bonus,0.9012
Loss/KL_divergence,-0.02939
Loss/Policy_loss,0.99336
Loss/Regularized_Actor_loss,1.63759


[34m[1mwandb[0m: Agent Starting Run: h23sj46y with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 768
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	actor_lr: 0.0001376294072645268
[34m[1mwandb[0m: 	adv_std: False
[34m[1mwandb[0m: 	bn: True
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.006174120243348213
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.0158383897681048
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.9374344812660268
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	hidden_sizes: [250, 350]
[34m[1mwandb[0m: 	initialization: normal
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 1.0393560820023642e-06
[34m[1mwandb[0m: 	l2_factor: 6.979923562484843e-06
[34m[1mwandb[0m: 	lrelu: 0.01

Config del trial
{'GAE_lambda': 0.95, 'T': 768, 'activation': 'tanh', 'actor_lr': 0.0001376294072645268, 'adv_std': False, 'bn': True, 'clipping_epsilon': 0.2, 'critic_lr': 0.006174120243348213, 'decay_method': 'exponential', 'dropout_prob': 0, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.0158383897681048, 'epochs': 10, 'exponential_factor': 0.9374344812660268, 'gamma': 0.99, 'hidden_sizes': [250, 350], 'initialization': 'normal', 'input_size': 10, 'l1_factor': 1.0393560820023642e-06, 'l2_factor': 6.979923562484843e-06, 'lrelu': 0.01, 'minibatch_size': 128, 'momentum': 0.99, 'output_size': 6, 'target_kl': 0.03, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [250, 350], 'output_size': 6, 'dropout_prob': 0, 'activation': 'tanh', 'lrelu': 0.01, 'bn': True, 'momentum': 0.99, 'initialization': 'normal', 'GAE_lambda': 0.95, 'T': 768, 'actor_lr': 0.0001376294072645268, 'adv_std': F

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▁▂▄▅▃▅▄▆▆▄▅▆▅▅▆▅█▅█▅▆▇▆▆▆▆▅▆▆▅▇▇▇▅▆▆▇█▆
Duration/Mean_val_ep_duration,▁▁▂▅▅▄▄▄▆▆▆▆▆▇▇▇▆▇▇▇▇▇▇▇▇█▇▆██▇▇▆█▆▇▆▇▆▇
Learning_rate/Actor,█▇▆▆▅▄▄▃▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,█▇▆▆▅▄▄▃▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,█▇▄▄▄▆▄▃▂▂▃▃▃▃▂▄▃▂▂▂▂▃▂▃▂▁▂▃▃▂▂▂▂▂▂▂▂▂▂▂
Loss/Critic_loss,█▇▆▄▃▅▃▄▃▃▃▃▂▃▃▃▃▂▃▁▃▄▁▃▂▂▂▄▃▃▂▃▂▂▂▁▃▂▂▂
Loss/Entropy_bonus,██▇▇▇▆▆▅▅▄▂▂▃▂▁▂▂▂▂▃▁▁▂▃▂▁▃▂▂▂▂▂▂▁▂▃▂▂▂▁
Loss/KL_divergence,▅▆▆█▅█▄▇▆▆▆▅▅▆▃▅▂▂▇▄▆▅▅▃▁▇█▄▄▆▄▅▇▆▆▅▄▄▅▇
Loss/Policy_loss,█▇▄▄▄▆▄▃▂▂▃▃▃▃▂▄▃▂▂▂▂▃▂▃▂▁▂▃▃▂▂▂▂▂▂▂▂▂▂▂
Loss/Regularized_Actor_loss,█▇▄▄▄▆▄▃▂▂▃▃▃▃▂▄▃▂▂▂▂▃▂▃▂▁▂▃▃▂▂▂▂▂▂▂▂▂▂▂

0,1
Duration/Mean_train_ep_duration,128.5
Duration/Mean_val_ep_duration,139.60001
Learning_rate/Actor,0.0
Learning_rate/Critic,2e-05
Loss/Actor_loss,-4.1071
Loss/Critic_loss,4.67224
Loss/Entropy_bonus,1.14499
Loss/KL_divergence,0.008
Loss/Policy_loss,-4.08897
Loss/Regularized_Actor_loss,-4.09153


[34m[1mwandb[0m: Agent Starting Run: sjbw0pa3 with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 1024
[34m[1mwandb[0m: 	activation: lrelu
[34m[1mwandb[0m: 	actor_lr: 0.0020942885165254714
[34m[1mwandb[0m: 	adv_std: True
[34m[1mwandb[0m: 	bn: False
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.00020285666319313592
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.2
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.025547631458773035
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.9568289807278793
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	hidden_sizes: [350, 350, 150, 350]
[34m[1mwandb[0m: 	initialization: uniform
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 1.6634105316797389e-06
[34m[1mwandb[0m: 	l2_factor: 0.0004606514402925785
[34m[1mwand

Config del trial
{'GAE_lambda': 0.95, 'T': 1024, 'activation': 'lrelu', 'actor_lr': 0.0020942885165254714, 'adv_std': True, 'bn': False, 'clipping_epsilon': 0.2, 'critic_lr': 0.00020285666319313592, 'decay_method': 'exponential', 'dropout_prob': 0.2, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.025547631458773035, 'epochs': 10, 'exponential_factor': 0.9568289807278793, 'gamma': 0.99, 'hidden_sizes': [350, 350, 150, 350], 'initialization': 'uniform', 'input_size': 10, 'l1_factor': 1.6634105316797389e-06, 'l2_factor': 0.0004606514402925785, 'lrelu': 0.001, 'minibatch_size': 64, 'momentum': 0.8, 'output_size': 6, 'target_kl': 0.01, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [350, 350, 150, 350], 'output_size': 6, 'dropout_prob': 0.2, 'activation': 'lrelu', 'lrelu': 0.001, 'bn': False, 'momentum': 0.8, 'initialization': 'uniform', 'GAE_lambda': 0.95, 'T': 1024, 'actor_lr': 0

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▂▂▂▃▄▅▅▅▆▇▆█▆▆▆▅▅▆▆▆▆██▆▆▆▆▆█▇▇▆▇▇▇▇▇▇▇
Duration/Mean_val_ep_duration,▁▁▁▁▂▄▄▅▅▆▅▆▅▅▅▅▄▄▅▄▅▆▆█▇▅▅▅▅▆▆▇▆▆▅▆▆▇▆▅
Learning_rate/Actor,██▇▇▆▆▅▅▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,██▇▇▆▆▅▅▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,█▂▁▂▂▂█▂▂▂▂▂▂▂▂▂▂▂▁▂▂▂▂▂▂▂▄▂▂▂▂▂▂▂▂▂▂▁▂▂
Loss/Critic_loss,█▂▁▂▂▃▂▂▂▂▂▁▁▂▂▁▂▂▁▁▁▂▁▂▂▂▁▁▁▁▁▁▂▁▁▁▁▂▂▁
Loss/Entropy_bonus,▁▁▅▇▆█▇▇▇▇███▇▇▇██▆▇▆▅▆▇▄▄▄▄▄▅▄▆▅▅▆▆▄▅▅▇
Loss/KL_divergence,█▃▁▃▄▄▄▄▄▄▄▄▄▃▃▄▄▃▄▄▄▄▄▃▃▃▄▃▃▄▄▃▃▃▄▃▄▃▃▃
Loss/Policy_loss,█▂▁▂▂▂█▂▂▂▂▂▂▂▂▂▂▂▁▂▂▂▂▂▂▂▄▂▂▂▂▂▂▂▂▂▂▁▂▂
Loss/Regularized_Actor_loss,█▂▁▁▂▁▅▁▂▁▁▁▂▁▁▁▁▁▁▁▁▁▂▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
Duration/Mean_train_ep_duration,157.33333
Duration/Mean_val_ep_duration,137.5
Learning_rate/Actor,7e-05
Learning_rate/Critic,1e-05
Loss/Actor_loss,-0.01754
Loss/Critic_loss,3.35752
Loss/Entropy_bonus,0.83468
Loss/KL_divergence,0.00153
Loss/Policy_loss,0.00378
Loss/Regularized_Actor_loss,-0.00469


[34m[1mwandb[0m: Agent Starting Run: vxs1o6u8 with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 768
[34m[1mwandb[0m: 	activation: lrelu
[34m[1mwandb[0m: 	actor_lr: 0.0006994883482315607
[34m[1mwandb[0m: 	adv_std: True
[34m[1mwandb[0m: 	bn: False
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.0011522613403651585
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.1
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.03870003682400877
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.9277244387698328
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	hidden_sizes: [350, 250, 250, 250]
[34m[1mwandb[0m: 	initialization: orthogonal
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 2.9068537515180694e-05
[34m[1mwandb[0m: 	l2_factor: 1.2661674277837051e-06
[34m[1mwan

Config del trial
{'GAE_lambda': 0.95, 'T': 768, 'activation': 'lrelu', 'actor_lr': 0.0006994883482315607, 'adv_std': True, 'bn': False, 'clipping_epsilon': 0.2, 'critic_lr': 0.0011522613403651585, 'decay_method': 'exponential', 'dropout_prob': 0.1, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.03870003682400877, 'epochs': 10, 'exponential_factor': 0.9277244387698328, 'gamma': 0.99, 'hidden_sizes': [350, 250, 250, 250], 'initialization': 'orthogonal', 'input_size': 10, 'l1_factor': 2.9068537515180694e-05, 'l2_factor': 1.2661674277837051e-06, 'lrelu': 0.001, 'minibatch_size': 64, 'momentum': 0.9, 'output_size': 6, 'target_kl': 0.03, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [350, 250, 250, 250], 'output_size': 6, 'dropout_prob': 0.1, 'activation': 'lrelu', 'lrelu': 0.001, 'bn': False, 'momentum': 0.9, 'initialization': 'orthogonal', 'GAE_lambda': 0.95, 'T': 768, 'actor_lr'

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▂▃▄▇▄▄▆▇▇▄▄▇▆▅▅▅█▇▇▇▅▆▅▅▆▆▇▆▅▆▇▅▆▆▅█▅▆▆
Duration/Mean_val_ep_duration,▁▁▂▆█▃▅▆▆▇▇▆▅▆▆▅▆▇▇▇▇▆▇▆▇▆▆▆▆▅▅▇▆▇▆▆▆▇▇▅
Learning_rate/Actor,█▇▅▄▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,█▇▅▄▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,▁▄▅▃▆▆▆▅▅▄▅▆▅▆▅▆▅▆▇▆▆▇▆▅▅▆▅▇▇██▆█▆▆▇▆▆▆▅
Loss/Critic_loss,██▅▄▃▆▇▃▄▂▃▃▄▃▂▆▅▁▂▄▂▇▄▃▄▂▃▂▂▄▁▂▄▄▃▂▃▃▅▅
Loss/Entropy_bonus,▆█▆██▆▅▅▃▄▁▃▅▃▃▄▅▃▄▅▆▃▂▄▃▄▆▄▂▃▃▄▂▃▆▂▆▃▃▃
Loss/KL_divergence,▁▅▅▇▇▆▆▆▆▇█▆▆▆▆▆▇▇▆▇▆▆▆▆▆▆▆▆▆▆▆▆▇▆▆▆▆▇▆▆
Loss/Policy_loss,▁▄▅▄▆▆▆▅▄▄▄▆▅▆▅▆▅▆▇▆▆▇▆▅▅▆▅▇▇██▆█▆▆▇▇▆▆▅
Loss/Regularized_Actor_loss,█▇▆▃▄▄▄▂▂▁▂▃▂▃▂▃▁▃▄▂▂▃▃▂▂▂▂▃▄▄▄▂▄▂▂▃▃▃▂▁

0,1
Duration/Mean_train_ep_duration,150.0
Duration/Mean_val_ep_duration,131.7
Learning_rate/Actor,0.0
Learning_rate/Critic,0.0
Loss/Actor_loss,-0.07412
Loss/Critic_loss,6.15334
Loss/Entropy_bonus,1.30448
Loss/KL_divergence,-0.00325
Loss/Policy_loss,-0.02363
Loss/Regularized_Actor_loss,0.03098


[34m[1mwandb[0m: Agent Starting Run: 0l52eqp5 with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 256
[34m[1mwandb[0m: 	activation: lrelu
[34m[1mwandb[0m: 	actor_lr: 0.0008256405505095168
[34m[1mwandb[0m: 	adv_std: False
[34m[1mwandb[0m: 	bn: False
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.008172994437793124
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.3
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.040982293675815976
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.8800705677263317
[34m[1mwandb[0m: 	gamma: 0.95
[34m[1mwandb[0m: 	hidden_sizes: [350, 150, 350, 250]
[34m[1mwandb[0m: 	initialization: uniform
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 0.00010693167898945396
[34m[1mwandb[0m: 	l2_factor: 6.186701203809096e-06
[34m[1mwandb

Config del trial
{'GAE_lambda': 0.95, 'T': 256, 'activation': 'lrelu', 'actor_lr': 0.0008256405505095168, 'adv_std': False, 'bn': False, 'clipping_epsilon': 0.2, 'critic_lr': 0.008172994437793124, 'decay_method': 'exponential', 'dropout_prob': 0.3, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.040982293675815976, 'epochs': 10, 'exponential_factor': 0.8800705677263317, 'gamma': 0.95, 'hidden_sizes': [350, 150, 350, 250], 'initialization': 'uniform', 'input_size': 10, 'l1_factor': 0.00010693167898945396, 'l2_factor': 6.186701203809096e-06, 'lrelu': 0.001, 'minibatch_size': 64, 'momentum': 0.95, 'output_size': 6, 'target_kl': 0.02, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [350, 150, 350, 250], 'output_size': 6, 'dropout_prob': 0.3, 'activation': 'lrelu', 'lrelu': 0.001, 'bn': False, 'momentum': 0.95, 'initialization': 'uniform', 'GAE_lambda': 0.95, 'T': 256, 'actor_lr': 0.

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,█▁▄▁▅▅▅▅▅▅▅▅▅▄▅▄▅▅▅▄▄▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅
Duration/Mean_val_ep_duration,█▂▁▄▄▄▄▅▄▄▅▄▄▄▄▄▅▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄
Learning_rate/Actor,█▇▆▆▅▄▄▄▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,█▇▆▆▅▄▄▄▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,▁▃▄▅▃▃▄▄▃▄▄▃▄▃▂▄▃▅▄▅▃▅▃▃▃▃▃▃▃▃▃█▃▃▂▃▂▃▃▆
Loss/Critic_loss,█▃▃▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Entropy_bonus,█▅▄▂▃▁▁▁▁▁▁▁▁▂▃▂▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▂▂▂▂▁▂▂
Loss/KL_divergence,▆█▇█▅▅▅▅▅▅▅▅▅▅▂▅▁▄▃▅▄▅▄▃▃▃▃▄▃▃▄▅▄▃▃▄▃▄▄▄
Loss/Policy_loss,▁▃▄▅▃▃▄▄▃▄▄▃▄▃▂▄▃▅▄▅▃▅▃▃▃▃▃▃▃▃▃█▃▃▂▃▂▃▃▆
Loss/Regularized_Actor_loss,▁▃▄▅▃▃▄▄▃▄▄▃▄▃▂▄▃▅▄▅▃▅▃▃▃▃▃▃▃▃▃█▃▃▂▃▂▃▃▆

0,1
Duration/Mean_train_ep_duration,35.0
Duration/Mean_val_ep_duration,34.9
Learning_rate/Actor,0.0
Learning_rate/Critic,1e-05
Loss/Actor_loss,55.7067
Loss/Critic_loss,3.01106
Loss/Entropy_bonus,0.14717
Loss/KL_divergence,0.0042
Loss/Policy_loss,55.71273
Loss/Regularized_Actor_loss,57.18271


[34m[1mwandb[0m: Agent Starting Run: 888ig7z0 with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 512
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	actor_lr: 0.006429814583048769
[34m[1mwandb[0m: 	adv_std: False
[34m[1mwandb[0m: 	bn: True
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.0008533310564093102
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.010441801207681848
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.8933222335516829
[34m[1mwandb[0m: 	gamma: 0.9
[34m[1mwandb[0m: 	hidden_sizes: [150, 250, 350]
[34m[1mwandb[0m: 	initialization: orthogonal
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 2.952362284990052e-06
[34m[1mwandb[0m: 	l2_factor: 1.912956921632254e-05
[34m[1mwandb[0m: 	lr

Config del trial
{'GAE_lambda': 0.95, 'T': 512, 'activation': 'tanh', 'actor_lr': 0.006429814583048769, 'adv_std': False, 'bn': True, 'clipping_epsilon': 0.2, 'critic_lr': 0.0008533310564093102, 'decay_method': 'exponential', 'dropout_prob': 0, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.010441801207681848, 'epochs': 10, 'exponential_factor': 0.8933222335516829, 'gamma': 0.9, 'hidden_sizes': [150, 250, 350], 'initialization': 'orthogonal', 'input_size': 10, 'l1_factor': 2.952362284990052e-06, 'l2_factor': 1.912956921632254e-05, 'lrelu': 0.001, 'minibatch_size': 64, 'momentum': 0.9, 'output_size': 6, 'target_kl': 0.01, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [150, 250, 350], 'output_size': 6, 'dropout_prob': 0, 'activation': 'tanh', 'lrelu': 0.001, 'bn': True, 'momentum': 0.9, 'initialization': 'orthogonal', 'GAE_lambda': 0.95, 'T': 512, 'actor_lr': 0.0064298145830487

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,█▄▅▃▁▂▃▄▂▂▂▂▃▃▃▃▃▃▂▂▂▂▃▃▃▃▃▂▃▃▃▃▃▃▃▂▃▂▃▃
Duration/Mean_val_ep_duration,█▁▅▅▁▄▇▄▅▅▅▅▅▅▅▆▅▅▅▅▆▅▆▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅
Learning_rate/Actor,█▇▅▃▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,█▇▅▃▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,▇▂▄▄█▅▂▃▃▅▃▂▂▄▅▄▄▁▂▄▁▃▂▃▄▃▄▂▂▃▄▁▂▂▂▂▃▄▂▇
Loss/Critic_loss,▁▄▃▃█▄▅▅▅▆▄▂▃▆▅▄▅▂▃▃▂▄▄▃▅▅▅▃▅▅▆▃▄▃▄▃▄▄▄▅
Loss/Entropy_bonus,█▄▂▁▇▃▃▄▂▃▃▄▄▂▃▂▃▃▂▂▂▂▃▄▁▂▂▂▃▃▂▃▁▃▄▂▂▄▃▄
Loss/KL_divergence,█▅▇▁▅▅▅▅▆▄▅▅▃▅▅▅▅▅▆▄▄▅▅▅▅▅▅▅▅█▅▆▅▅▅▅▅▄▅▆
Loss/Policy_loss,▇▂▄▄█▅▂▃▃▅▃▂▂▄▅▄▄▁▂▄▁▃▂▃▄▃▄▂▂▃▄▁▂▂▂▂▃▄▂▇
Loss/Regularized_Actor_loss,▇▂▄▄█▅▂▃▃▅▃▂▂▄▅▄▄▁▂▄▁▃▂▃▄▃▄▂▂▃▄▁▂▂▂▂▃▄▂▇

0,1
Duration/Mean_train_ep_duration,65.5
Duration/Mean_val_ep_duration,65.4
Learning_rate/Actor,0.0
Learning_rate/Critic,0.0
Loss/Actor_loss,14.44627
Loss/Critic_loss,8.84784
Loss/Entropy_bonus,0.49195
Loss/KL_divergence,0.03941
Loss/Policy_loss,14.45141
Loss/Regularized_Actor_loss,14.51219


[34m[1mwandb[0m: Agent Starting Run: d5fvt26y with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 256
[34m[1mwandb[0m: 	activation: lrelu
[34m[1mwandb[0m: 	actor_lr: 0.00021015886374918795
[34m[1mwandb[0m: 	adv_std: False
[34m[1mwandb[0m: 	bn: True
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.0006674442257353529
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.3
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.030437637850713264
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.9293287859460988
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	hidden_sizes: [150, 350, 150, 150]
[34m[1mwandb[0m: 	initialization: normal
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 0.000701728362533609
[34m[1mwandb[0m: 	l2_factor: 4.95868537917224e-06
[34m[1mwandb[0m

Config del trial
{'GAE_lambda': 0.95, 'T': 256, 'activation': 'lrelu', 'actor_lr': 0.00021015886374918795, 'adv_std': False, 'bn': True, 'clipping_epsilon': 0.2, 'critic_lr': 0.0006674442257353529, 'decay_method': 'exponential', 'dropout_prob': 0.3, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.030437637850713264, 'epochs': 10, 'exponential_factor': 0.9293287859460988, 'gamma': 0.99, 'hidden_sizes': [150, 350, 150, 150], 'initialization': 'normal', 'input_size': 10, 'l1_factor': 0.000701728362533609, 'l2_factor': 4.95868537917224e-06, 'lrelu': 0.001, 'minibatch_size': 256, 'momentum': 0.95, 'output_size': 6, 'target_kl': 0.02, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [150, 350, 150, 150], 'output_size': 6, 'dropout_prob': 0.3, 'activation': 'lrelu', 'lrelu': 0.001, 'bn': True, 'momentum': 0.95, 'initialization': 'normal', 'GAE_lambda': 0.95, 'T': 256, 'actor_lr': 0.0002

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▂▁▁▃▂▁▂▃▃▆▂▃▃▂▇▇▃▆▂▄▃▂▃▅▃▇▂▄▄▆▅▆▅▄█▅▄▅▃▇
Duration/Mean_val_ep_duration,▃▁▃▃▆▂▄▄▆▅▆▆▆▃▃█▆▆▆█▆█▅▇▇▇▅▅▅▆▆▆▅▄▆▅▃▇▆▆
Learning_rate/Actor,██▇▆▅▅▅▄▄▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,██▇▆▅▅▅▄▄▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,███▆▃▆▅▄▃▃▃▂▅▃▃▁▄▃▂▅▄▅▄▄▇▁▃▄▃▃▄▃▃▆▂▄▅▅▅▃
Loss/Critic_loss,▃▇█▆▃▄▄▃▃▃▃▂▃▂▂▁▃▂▂▃▃▃▂▃▄▂▃▂▂▂▂▂▁▃▁▃▃▃▃▂
Loss/Entropy_bonus,██▆▆▅▅▄▄▃▃▂▂▂▃▂▂▂▂▂▂▂▂▂▂▂▂▁▂▁▂▂▁▁▁▁▂▂▂▁▁
Loss/KL_divergence,▂▃▆▆▆▆▆▆▅▇▇▆▄▇▅▆▆▅▆▆▆▆▅▇▄▆█▃▅▄█▄▄▆█▁▄▄▆▄
Loss/Policy_loss,███▆▃▆▅▄▃▃▃▂▅▃▃▁▄▃▂▅▄▅▄▄▇▁▃▄▃▃▄▃▃▆▂▄▅▅▅▃
Loss/Regularized_Actor_loss,███▆▃▆▅▄▃▃▃▂▅▂▃▁▄▃▂▅▄▅▄▄▇▁▃▄▃▃▄▃▃▆▂▄▅▅▅▃

0,1
Duration/Mean_train_ep_duration,85.0
Duration/Mean_val_ep_duration,49.0
Learning_rate/Actor,0.0
Learning_rate/Critic,1e-05
Loss/Actor_loss,15.74786
Loss/Critic_loss,9.64008
Loss/Entropy_bonus,1.71515
Loss/KL_divergence,0.00032
Loss/Policy_loss,15.80007
Loss/Regularized_Actor_loss,22.80404


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: u9yo6ju0 with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 1024
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	actor_lr: 0.003934005060273262
[34m[1mwandb[0m: 	adv_std: True
[34m[1mwandb[0m: 	bn: True
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.0015451733208604029
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.3
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.044706776844353834
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.9852998069454576
[34m[1mwandb[0m: 	gamma: 0.95
[34m[1mwandb[0m: 	hidden_sizes: [150, 350, 350, 350]
[34m[1mwandb[0m: 	initialization: normal
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 9.018529341

Config del trial
{'GAE_lambda': 0.95, 'T': 1024, 'activation': 'tanh', 'actor_lr': 0.003934005060273262, 'adv_std': True, 'bn': True, 'clipping_epsilon': 0.2, 'critic_lr': 0.0015451733208604029, 'decay_method': 'exponential', 'dropout_prob': 0.3, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.044706776844353834, 'epochs': 10, 'exponential_factor': 0.9852998069454576, 'gamma': 0.95, 'hidden_sizes': [150, 350, 350, 350], 'initialization': 'normal', 'input_size': 10, 'l1_factor': 9.018529341172836e-05, 'l2_factor': 3.524715820736802e-05, 'lrelu': 0.001, 'minibatch_size': 64, 'momentum': 0.95, 'output_size': 6, 'target_kl': 0.01, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [150, 350, 350, 350], 'output_size': 6, 'dropout_prob': 0.3, 'activation': 'tanh', 'lrelu': 0.001, 'bn': True, 'momentum': 0.95, 'initialization': 'normal', 'GAE_lambda': 0.95, 'T': 1024, 'actor_lr': 0.003934

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▁▁▂▁▃▃▅█▅▄▄▄▄▂▃█▄▆▄▃▇▃▂▇▄▃▅▃▅▄▅▄▃▅▄▅▅▅▅
Duration/Mean_val_ep_duration,▂▁▁▃▁▃▃▃▅▇▅█▆▄▃▄▇▅▃▅▅▅▄▂▄▅▇▆▆▄▆▇▅▆▅▅▅▅▄▇
Learning_rate/Actor,██▇▇▇▆▆▆▅▅▅▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁
Learning_rate/Critic,██▇▇▇▆▆▆▅▅▅▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁
Loss/Actor_loss,▆▆█▄▅▄▃▆▄▄▂▃▂▃▄▁▂▃▁▁▃▃▂▂▅▂▂▃▃▃▁▄▂▂▃▄▃▃▃▄
Loss/Critic_loss,▆▅▃█▄▃▆▃█▅▂▄▆▃▃▁▃▃▄▁▃▂▄▂▂▃▅▄▅▄▂▃▂▂▂▃▁▂▁▂
Loss/Entropy_bonus,█▇▇▆▄▁▂▃▃▅▅▇▇▇▇█████▇▇▇▇▇▇▇▇▇█▇▇▇▇▇▇▇▇▇▇
Loss/KL_divergence,█▅▆▆▆▆▆▆▆▅▁▄▅▁▅▅▄▅▅▅▅▂▅▅▅▄▅▅▅▅▃▅▆▅▄▆▅▃▅▄
Loss/Policy_loss,▆▆█▃▄▃▁▅▃▄▁▃▂▃▄▁▂▃▁▁▂▂▂▂▅▂▂▂▂▃▁▄▂▂▃▄▃▃▃▄
Loss/Regularized_Actor_loss,█▆▄▃▄▄▃▃▂▂▂▂▁▂▂▁▁▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▂▁▁▁▁

0,1
Duration/Mean_train_ep_duration,129.42857
Duration/Mean_val_ep_duration,75.7
Learning_rate/Actor,0.00047
Learning_rate/Critic,0.00018
Loss/Actor_loss,-0.0254
Loss/Critic_loss,3.24129
Loss/Entropy_bonus,1.40171
Loss/KL_divergence,0.01437
Loss/Policy_loss,0.03727
Loss/Regularized_Actor_loss,-0.00289


[34m[1mwandb[0m: Agent Starting Run: lzcf9f0t with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 256
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	actor_lr: 0.0009336196748325956
[34m[1mwandb[0m: 	adv_std: False
[34m[1mwandb[0m: 	bn: True
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.0008705695480191021
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.3
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.036162752327663354
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.8567893872355051
[34m[1mwandb[0m: 	gamma: 0.9
[34m[1mwandb[0m: 	hidden_sizes: [250, 350, 150, 150]
[34m[1mwandb[0m: 	initialization: normal
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 1.954866391921722e-05
[34m[1mwandb[0m: 	l2_factor: 4.909268383712707e-06
[34m[1mwandb[0m:

Config del trial
{'GAE_lambda': 0.95, 'T': 256, 'activation': 'tanh', 'actor_lr': 0.0009336196748325956, 'adv_std': False, 'bn': True, 'clipping_epsilon': 0.2, 'critic_lr': 0.0008705695480191021, 'decay_method': 'exponential', 'dropout_prob': 0.3, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.036162752327663354, 'epochs': 10, 'exponential_factor': 0.8567893872355051, 'gamma': 0.9, 'hidden_sizes': [250, 350, 150, 150], 'initialization': 'normal', 'input_size': 10, 'l1_factor': 1.954866391921722e-05, 'l2_factor': 4.909268383712707e-06, 'lrelu': 0.01, 'minibatch_size': 256, 'momentum': 0.99, 'output_size': 6, 'target_kl': 0.01, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [250, 350, 150, 150], 'output_size': 6, 'dropout_prob': 0.3, 'activation': 'tanh', 'lrelu': 0.01, 'bn': True, 'momentum': 0.99, 'initialization': 'normal', 'GAE_lambda': 0.95, 'T': 256, 'actor_lr': 0.00093361

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▂▄▂▂▄▂▂▁▃▂▄▁▃▁▂▄▂▂▃▁█▂▃▃▂▁▃▂▂▂▂
Duration/Mean_val_ep_duration,▆▅▄▄▄▄▄▃▅▂▄▂▅▂▁▂▄▂▃▂▂▃▃▃▄▁▄█▇▄▅▃
Learning_rate/Actor,█▇▆▅▅▄▄▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,█▇▆▅▅▄▄▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,█▂▂▃▃▂▄▃▁▂▁▂▅▂▄▃▁▃▄▃▃▁▃▃▃▃▆▂▃▃▄▄
Loss/Critic_loss,█▂▂▃▃▂▄▃▁▂▂▂▅▂▄▄▁▃▄▃▃▁▃▃▃▃▆▂▃▃▄▄
Loss/Entropy_bonus,█▅▅▄▃▁▁▂▃▁▂▂▂▁▃▂▁▁▂▃▃▂▁▁▃▂▃▃▂▃▂▁
Loss/KL_divergence,▁▁▂▂▅▆▅▄▅█▆▆▅▆▄▄█▆▆▄▆▇▅▆▆▅▅▄▆▆▇▆
Loss/Policy_loss,█▂▂▃▃▂▄▃▁▂▁▂▅▂▄▃▁▃▄▃▃▁▃▃▃▃▆▂▃▃▄▄
Loss/Regularized_Actor_loss,█▂▂▃▃▂▄▃▁▂▁▂▅▂▄▃▁▃▄▃▃▁▃▃▃▃▆▂▃▃▄▄

0,1
Duration/Mean_train_ep_duration,19.2
Duration/Mean_val_ep_duration,26.8
Learning_rate/Actor,1e-05
Learning_rate/Critic,1e-05
Loss/Actor_loss,15.30535
Loss/Critic_loss,17.31157
Loss/Entropy_bonus,1.59618
Loss/KL_divergence,0.01963
Loss/Policy_loss,15.36307
Loss/Regularized_Actor_loss,15.56474


[34m[1mwandb[0m: Agent Starting Run: ob4fi55n with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 256
[34m[1mwandb[0m: 	activation: lrelu
[34m[1mwandb[0m: 	actor_lr: 0.0006981809486659926
[34m[1mwandb[0m: 	adv_std: True
[34m[1mwandb[0m: 	bn: True
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.0006518018015069512
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.1
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.015714466721123286
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.9417909361686218
[34m[1mwandb[0m: 	gamma: 0.95
[34m[1mwandb[0m: 	hidden_sizes: [250, 150]
[34m[1mwandb[0m: 	initialization: orthogonal
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 2.0537797963198855e-06
[34m[1mwandb[0m: 	l2_factor: 0.00012439713588152478
[34m[1mwandb[0m: 	l

Config del trial
{'GAE_lambda': 0.95, 'T': 256, 'activation': 'lrelu', 'actor_lr': 0.0006981809486659926, 'adv_std': True, 'bn': True, 'clipping_epsilon': 0.2, 'critic_lr': 0.0006518018015069512, 'decay_method': 'exponential', 'dropout_prob': 0.1, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.015714466721123286, 'epochs': 10, 'exponential_factor': 0.9417909361686218, 'gamma': 0.95, 'hidden_sizes': [250, 150], 'initialization': 'orthogonal', 'input_size': 10, 'l1_factor': 2.0537797963198855e-06, 'l2_factor': 0.00012439713588152478, 'lrelu': 0.01, 'minibatch_size': 128, 'momentum': 0.95, 'output_size': 6, 'target_kl': 0.01, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [250, 150], 'output_size': 6, 'dropout_prob': 0.1, 'activation': 'lrelu', 'lrelu': 0.01, 'bn': True, 'momentum': 0.95, 'initialization': 'orthogonal', 'GAE_lambda': 0.95, 'T': 256, 'actor_lr': 0.0006981809486659

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▂▃▃▃▃▅▃▄▆▅▇▄▆▆▇▇█▇▇█▆▇▇▅▇▇▇▆▄▇▇▇▆▇▇▄▅▆▇
Duration/Mean_val_ep_duration,▁▂▂▂▄▄▅▅▄▆▆▆▅▇▇███▆▇▇▇▇██▇███▇██▇▇▇▇▆██▇
Learning_rate/Actor,█▇▆▅▅▄▄▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,█▇▆▅▅▄▄▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,▁▃▄▅▄▄▆▅▇▇▅▆▅▆▆▇█▆▅▆▆▅▇▇▆▆▆▆▅▇▆▇▆▄▆▆▆▇▇▅
Loss/Critic_loss,█▅▃▃▁▃▃▂▂▂▃▃▃▂▂▂▃▁▁▂▁▁▂▁▁▁▂▂▁▁▁▂▁▁▁▁▁▂▃▁
Loss/Entropy_bonus,█▆▆▆▅▅▅▃▄▃▂▂▂▁▂▁▁▁▁▁▂▂▂▁▁▂▂▂▂▂▂▂▃▂▂▂▂▁▁▂
Loss/KL_divergence,▅▄▄▅▅▅▅▆▅▅▄▅▅▅▃▂▄▅▃▆▆▄▆▅▄▅▆▆▆▄▆▆▇█▅▆▅▃▅▁
Loss/Policy_loss,▁▃▄▅▄▄▇▅▇▇▅▆▅▆▅▆█▆▄▆▆▅▇▆▆▅▆▆▅▇▆▇▆▄▆▆▆▇▇▄
Loss/Regularized_Actor_loss,▁▃▄▅▄▃▆▅▇▇▅▆▄▆▅▆█▆▄▆▆▅▇▆▆▅▆▆▅▇▆▇▅▄▆▆▅▇▇▄

0,1
Duration/Mean_train_ep_duration,114.0
Duration/Mean_val_ep_duration,112.6
Learning_rate/Actor,0.0
Learning_rate/Critic,0.0
Loss/Actor_loss,-0.043
Loss/Critic_loss,6.38792
Loss/Entropy_bonus,1.0601
Loss/KL_divergence,-0.02632
Loss/Policy_loss,-0.02634
Loss/Regularized_Actor_loss,0.03112


[34m[1mwandb[0m: Agent Starting Run: ndeduizn with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 512
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	actor_lr: 0.0005587974441550501
[34m[1mwandb[0m: 	adv_std: False
[34m[1mwandb[0m: 	bn: True
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.0017736436086005407
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.2
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.011866306267991716
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.9186584113056208
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	hidden_sizes: [150, 350, 250, 250]
[34m[1mwandb[0m: 	initialization: normal
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 8.110965714136781e-05
[34m[1mwandb[0m: 	l2_factor: 5.354898250252507e-05
[34m[1mwandb[0m

Config del trial
{'GAE_lambda': 0.95, 'T': 512, 'activation': 'tanh', 'actor_lr': 0.0005587974441550501, 'adv_std': False, 'bn': True, 'clipping_epsilon': 0.2, 'critic_lr': 0.0017736436086005407, 'decay_method': 'exponential', 'dropout_prob': 0.2, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.011866306267991716, 'epochs': 10, 'exponential_factor': 0.9186584113056208, 'gamma': 0.99, 'hidden_sizes': [150, 350, 250, 250], 'initialization': 'normal', 'input_size': 10, 'l1_factor': 8.110965714136781e-05, 'l2_factor': 5.354898250252507e-05, 'lrelu': 0.01, 'minibatch_size': 256, 'momentum': 0.95, 'output_size': 6, 'target_kl': 0.01, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [150, 350, 250, 250], 'output_size': 6, 'dropout_prob': 0.2, 'activation': 'tanh', 'lrelu': 0.01, 'bn': True, 'momentum': 0.95, 'initialization': 'normal', 'GAE_lambda': 0.95, 'T': 512, 'actor_lr': 0.0005587

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▃▄▅▅▅▅▅▅▅▅▆▄▅▆▅▅▆▅▅▇█▆▆▅▅▄▇▄▅▅▄▅▇▅▅▆▅▆▅
Duration/Mean_val_ep_duration,▂▁▅▇▆▆▅▆▅▅▆█▅▇▆▇▇▇▆▇▆▆▇▆▆█▇▇█▆▆█▆▅▆▆█▇▆▅
Learning_rate/Actor,█▇▅▄▃▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,█▇▅▄▃▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,█▄▄▄▄▄▃▃▃▃▃▂▃▃▂▄▄▃▄▄▂▁▃▃▃▄▃▃▃▃▃▃▃▁▃▂▃▃▃▃
Loss/Critic_loss,█▂▁▁▂▁▂▂▂▂▁▂▁▂▃▁▁▂▁▁▃▃▂▂▂▁▂▂▂▂▂▂▁▃▁▂▂▂▂▂
Loss/Entropy_bonus,█▇▄▄▄▃▂▂▂▂▂▂▁▁▂▁▁▁▂▁▂▂▁▂▂▁▂▂▁▂▂▂▁▂▂▁▁▂▂▂
Loss/KL_divergence,▆▃▅▂▃▂▂▆▅▄▄▄▄▅▂▂▂▃▅▁█▄▇▄▃▃▄▅▃▄▆▄▃▃▅▄▄▃▄▅
Loss/Policy_loss,█▄▄▄▄▄▃▃▃▃▃▂▃▃▂▄▄▃▄▄▂▁▃▃▃▄▃▃▃▃▃▃▃▁▃▂▃▃▃▃
Loss/Regularized_Actor_loss,█▄▄▄▄▄▃▃▃▃▃▂▃▃▂▄▄▃▄▄▂▁▃▃▃▄▃▃▃▃▃▃▃▁▃▂▃▃▃▃

0,1
Duration/Mean_train_ep_duration,80.0
Duration/Mean_val_ep_duration,75.4
Learning_rate/Actor,0.0
Learning_rate/Critic,0.0
Loss/Actor_loss,15.88011
Loss/Critic_loss,9.5771
Loss/Entropy_bonus,1.22843
Loss/KL_divergence,-0.0071
Loss/Policy_loss,15.89468
Loss/Regularized_Actor_loss,17.21081


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: io70i6ce with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 256
[34m[1mwandb[0m: 	activation: lrelu
[34m[1mwandb[0m: 	actor_lr: 0.00012809112872037663
[34m[1mwandb[0m: 	adv_std: True
[34m[1mwandb[0m: 	bn: False
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.003575056040506533
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.3
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.015013566685078274
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.9132641855358214
[34m[1mwandb[0m: 	gamma: 0.9
[34m[1mwandb[0m: 	hidden_sizes: [350, 250, 350, 150]
[34m[1mwandb[0m: 	initialization: orthogonal
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 0.0001

Config del trial
{'GAE_lambda': 0.95, 'T': 256, 'activation': 'lrelu', 'actor_lr': 0.00012809112872037663, 'adv_std': True, 'bn': False, 'clipping_epsilon': 0.2, 'critic_lr': 0.003575056040506533, 'decay_method': 'exponential', 'dropout_prob': 0.3, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.015013566685078274, 'epochs': 10, 'exponential_factor': 0.9132641855358214, 'gamma': 0.9, 'hidden_sizes': [350, 250, 350, 150], 'initialization': 'orthogonal', 'input_size': 10, 'l1_factor': 0.00013389786062526236, 'l2_factor': 6.133265095881513e-06, 'lrelu': 0.001, 'minibatch_size': 128, 'momentum': 0.9, 'output_size': 6, 'target_kl': 0.03, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [350, 250, 350, 150], 'output_size': 6, 'dropout_prob': 0.3, 'activation': 'lrelu', 'lrelu': 0.001, 'bn': False, 'momentum': 0.9, 'initialization': 'orthogonal', 'GAE_lambda': 0.95, 'T': 256, 'actor_lr'

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▁▁▂▂▁▂▃▂▃▇▂▃▂▃▃▃▅▃▂▂▃▂▃▂█▂▄▃▃▃▃▂▃▃▄▄▂▄▃
Duration/Mean_val_ep_duration,▂▄▅▃▅▁▃▆▆▂▆▆▅▅▄▅▃▄▅▅▄▆▄▄▅▅▆▆▄█▆▄▄▅▅▅▅▆▆▃
Learning_rate/Actor,█▇▆▅▅▄▄▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,█▇▆▅▅▄▄▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,▁▁▂▂▃▂▂▂▃▃▃▂▂▅▃▃▂▄▄▂▂▃▃▃▄▄▃▄▂▅▃▂▃▂▃▄▃█▄▄
Loss/Critic_loss,█▆▅▂▃▂▃▂▃▂▂▂▂▃▁▂▂▂▃▂▂▂▁▁▂▂▂▂▁▂▁▂▁▂▁▂▁▂▁▃
Loss/Entropy_bonus,██▆▆▅▇▄▇▆▅▅▄▁▃▅▅▅▅▄▄▃▃▃▂▃▅▁▂▂▃▂▅▂▂▂▄▅▂▂▃
Loss/KL_divergence,▃▃▅▇█▂▇▄▆▄▁▃▂▄▃▃▃▂▃▅▄▁▄▃▄▃▃▃▃▄▅▄▄▄▅▃▂▁▇▃
Loss/Policy_loss,▁▁▂▂▃▂▂▂▃▃▃▁▂▅▃▃▂▄▄▂▂▃▃▃▄▄▃▄▂▅▃▂▃▂▃▄▃█▄▄
Loss/Regularized_Actor_loss,█▇▆▅▅▄▃▃▃▃▂▂▂▃▂▂▁▂▂▁▁▂▁▁▂▂▂▂▁▃▂▁▂▁▁▂▁▄▂▂

0,1
Duration/Mean_train_ep_duration,45.0
Duration/Mean_val_ep_duration,40.7
Learning_rate/Actor,0.0
Learning_rate/Critic,0.0
Loss/Actor_loss,0.00248
Loss/Critic_loss,11.06551
Loss/Entropy_bonus,1.62145
Loss/KL_divergence,0.00184
Loss/Policy_loss,0.02682
Loss/Regularized_Actor_loss,1.48768


[34m[1mwandb[0m: Agent Starting Run: 0veddcwe with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 768
[34m[1mwandb[0m: 	activation: lrelu
[34m[1mwandb[0m: 	actor_lr: 0.002853767532136351
[34m[1mwandb[0m: 	adv_std: True
[34m[1mwandb[0m: 	bn: False
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.00017635712455395084
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.00971167478621916
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.8716858916887438
[34m[1mwandb[0m: 	gamma: 0.95
[34m[1mwandb[0m: 	hidden_sizes: [350, 150, 150]
[34m[1mwandb[0m: 	initialization: orthogonal
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 2.1056687735888173e-05
[34m[1mwandb[0m: 	l2_factor: 5.051633332215996e-06
[34m[1mwandb[0m: 

Config del trial
{'GAE_lambda': 0.95, 'T': 768, 'activation': 'lrelu', 'actor_lr': 0.002853767532136351, 'adv_std': True, 'bn': False, 'clipping_epsilon': 0.2, 'critic_lr': 0.00017635712455395084, 'decay_method': 'exponential', 'dropout_prob': 0, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.00971167478621916, 'epochs': 10, 'exponential_factor': 0.8716858916887438, 'gamma': 0.95, 'hidden_sizes': [350, 150, 150], 'initialization': 'orthogonal', 'input_size': 10, 'l1_factor': 2.1056687735888173e-05, 'l2_factor': 5.051633332215996e-06, 'lrelu': 0.1, 'minibatch_size': 128, 'momentum': 0.95, 'output_size': 6, 'target_kl': 0.02, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [350, 150, 150], 'output_size': 6, 'dropout_prob': 0, 'activation': 'lrelu', 'lrelu': 0.1, 'bn': False, 'momentum': 0.95, 'initialization': 'orthogonal', 'GAE_lambda': 0.95, 'T': 768, 'actor_lr': 0.002853767532

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▁▁▁▁▂▂▂▂▂▂▂▃▃▃▅▆▃▅▄▃▄▃▅▅▄▇▄▅▄▄▄▄▄█▄▄▄▃▃
Duration/Mean_val_ep_duration,▁▁▁▁▁▂▂▂▂▂▂▃▃▅▆▆▄▄▇▄▇▆▆█▅▇▆█▇▇▆▇▅▇▇▅▇▆▆▆
Learning_rate/Actor,█▇▆▅▄▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,█▇▆▅▄▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,▆█▅▁▁▄▂▂▂▃▃▂▃▂▃▂▂▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃
Loss/Critic_loss,█▇▃▃▂▄▃▃▄▃▃▃▃▂▁▁▃▂▂▂▂▂▁▂▂▁▂▂▁▂▂▂▂▂▂▂▂▂▂▂
Loss/Entropy_bonus,▅▇▅▇▇█▇▇█▇▇▇▆▇▄▄▅▆▆▆▄▄▃▃▄▂▅▄▁▅▅▅▆▆▁▅▅▅▆▃
Loss/KL_divergence,█▅▄▁▁▃▃▃▂▂▃▃▂▂▂▂▂▃▂▂▂▂▂▂▂▂▂▃▂▂▂▂▂▂▂▂▂▂▂▂
Loss/Policy_loss,▆█▅▁▁▄▂▂▂▃▃▂▃▂▃▂▂▃▃▃▃▂▃▃▂▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃
Loss/Regularized_Actor_loss,▆█▅▁▁▃▂▂▁▂▃▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂

0,1
Duration/Mean_train_ep_duration,99.25
Duration/Mean_val_ep_duration,150.8
Learning_rate/Actor,0.0
Learning_rate/Critic,0.0
Loss/Actor_loss,-0.0051
Loss/Critic_loss,5.68786
Loss/Entropy_bonus,0.53521
Loss/KL_divergence,8e-05
Loss/Policy_loss,0.0001
Loss/Regularized_Actor_loss,0.0589


[34m[1mwandb[0m: Agent Starting Run: quzgahaa with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 256
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	actor_lr: 0.0002764543275882708
[34m[1mwandb[0m: 	adv_std: True
[34m[1mwandb[0m: 	bn: False
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.00016048265292976295
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.0013071406050925148
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.9373557153924592
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	hidden_sizes: [350, 350, 150, 250]
[34m[1mwandb[0m: 	initialization: orthogonal
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 2.637148708139512e-05
[34m[1mwandb[0m: 	l2_factor: 0.0001265984072527381
[34m[1mwandb

Config del trial
{'GAE_lambda': 0.95, 'T': 256, 'activation': 'tanh', 'actor_lr': 0.0002764543275882708, 'adv_std': True, 'bn': False, 'clipping_epsilon': 0.2, 'critic_lr': 0.00016048265292976295, 'decay_method': 'exponential', 'dropout_prob': 0, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.0013071406050925148, 'epochs': 10, 'exponential_factor': 0.9373557153924592, 'gamma': 0.99, 'hidden_sizes': [350, 350, 150, 250], 'initialization': 'orthogonal', 'input_size': 10, 'l1_factor': 2.637148708139512e-05, 'l2_factor': 0.0001265984072527381, 'lrelu': 0.01, 'minibatch_size': 256, 'momentum': 0.99, 'output_size': 6, 'target_kl': 0.03, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [350, 350, 150, 250], 'output_size': 6, 'dropout_prob': 0, 'activation': 'tanh', 'lrelu': 0.01, 'bn': False, 'momentum': 0.99, 'initialization': 'orthogonal', 'GAE_lambda': 0.95, 'T': 256, 'actor_lr': 0.

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▁▃▄▃▂▂▃▄▃▄▅▅▄▃▄▄▇█▅▆▆▅▆▄▄▄█▄█▆▄█▆▄▄▇▄▅▃
Duration/Mean_val_ep_duration,▁▃▃▂▄▃▃▄▄▄▄▄▄▄▅▆█▇▅▇▅▄▆▅▅▆▅▇▇▆▆▅▆▆▆▆▇▆▆▄
Learning_rate/Actor,█▇▆▅▅▄▄▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,█▇▆▅▅▄▄▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,▁▃▄▅▅▆▃▆▅▄▅▆▇▆▇▄▅▆▇▇▇▇██████████████████
Loss/Critic_loss,█▅█▅▃▂▅▃▂▃▃▁▂▂▂▅▂▃▃▂▂▂▃▂▂▃▃▂▄▄▂▃▃▂▃▃▂▄▃▃
Loss/Entropy_bonus,█▇▇▆▆▅▄▃▃▄▅▃▂▂▂▄▄▃▃▃▃▂▂▂▂▂▂▂▃▂▂▂▂▂▂▂▂▂▁▁
Loss/KL_divergence,▇▆▁▆▄███▄▇▄▆▅▇▄▁▃▅▅▅▅▅▄▄▅▅▅▅▅▅▅▅▅▄▅▅▅▅▅▅
Loss/Policy_loss,▁▃▄▅▅▆▃▆▅▄▅▆▇▆▇▄▅▆▇▇▇▇██████████████████
Loss/Regularized_Actor_loss,█▇▅▅▄▄▂▃▂▂▂▂▃▂▂▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂

0,1
Duration/Mean_train_ep_duration,48.0
Duration/Mean_val_ep_duration,67.0
Learning_rate/Actor,0.0
Learning_rate/Critic,0.0
Loss/Actor_loss,-0.00168
Loss/Critic_loss,12.96304
Loss/Entropy_bonus,1.23171
Loss/KL_divergence,-2e-05
Loss/Policy_loss,-7e-05
Loss/Regularized_Actor_loss,0.38096


[34m[1mwandb[0m: Agent Starting Run: jgzgp0eb with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 512
[34m[1mwandb[0m: 	activation: lrelu
[34m[1mwandb[0m: 	actor_lr: 0.007593290564047245
[34m[1mwandb[0m: 	adv_std: True
[34m[1mwandb[0m: 	bn: True
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.000585812330053055
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.02955210171667238
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.8696843120194657
[34m[1mwandb[0m: 	gamma: 0.95
[34m[1mwandb[0m: 	hidden_sizes: [250, 150, 150, 250]
[34m[1mwandb[0m: 	initialization: uniform
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 0.00020274076099870337
[34m[1mwandb[0m: 	l2_factor: 0.0007922702192707737
[34m[1mwandb[0m: 	

Config del trial
{'GAE_lambda': 0.95, 'T': 512, 'activation': 'lrelu', 'actor_lr': 0.007593290564047245, 'adv_std': True, 'bn': True, 'clipping_epsilon': 0.2, 'critic_lr': 0.000585812330053055, 'decay_method': 'exponential', 'dropout_prob': 0, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.02955210171667238, 'epochs': 10, 'exponential_factor': 0.8696843120194657, 'gamma': 0.95, 'hidden_sizes': [250, 150, 150, 250], 'initialization': 'uniform', 'input_size': 10, 'l1_factor': 0.00020274076099870337, 'l2_factor': 0.0007922702192707737, 'lrelu': 0.01, 'minibatch_size': 256, 'momentum': 0.9, 'output_size': 6, 'target_kl': 0.03, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [250, 150, 150, 250], 'output_size': 6, 'dropout_prob': 0, 'activation': 'lrelu', 'lrelu': 0.01, 'bn': True, 'momentum': 0.9, 'initialization': 'uniform', 'GAE_lambda': 0.95, 'T': 512, 'actor_lr': 0.007593290564

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▂▂▁▂▁▂▃▂▄▂▃▇▇▆▄▄▇▆▅▇▅▆▆▄▄▅▆▅▄▅█▄█▅▆█▇▅▄
Duration/Mean_val_ep_duration,▁▂▂▂▂▂▃▄▂▃▃▅▄▆▄▃▄▆▅▅▇▇▅▅▄▅▇█▆▆▅▇▆▇▆▆▆▇▆▆
Learning_rate/Actor,█▆▅▄▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,█▆▅▄▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,▅▂▃▃▃▃▁▂▂▂▂▂▂▃▃▃▃▄▄▄▅▄▆▄▅▅▆▆▄▇█▆▅▅▅▅▆▆▇▄
Loss/Critic_loss,██▅▅▅▃▄▂▃▂▃▁▂▂▃▃▂▂▂▃▃▃▄▅▃▃▃▃▂▃▄▃▃▃▃▃▃▃▂▃
Loss/Entropy_bonus,▆▁▅▆▇█▇███▇██▇▇▇▇▆▆▇▆▆▆▇▇▆▆▆▆▆▆▆▆▆▆▆▇▆▆▆
Loss/KL_divergence,█▄▃▄▄▄▅▃▃▃▄▃▄▄▂▃▄▄▂▄▂▄▂▁▃▂▃▂▄▃▂▄▃▃▄▃▂▂▂▃
Loss/Policy_loss,▅▂▃▃▃▃▁▂▂▂▂▂▂▃▃▃▃▄▄▄▅▄▆▄▅▅▆▆▄▇█▆▅▅▅▅▆▅▇▄
Loss/Regularized_Actor_loss,█▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
Duration/Mean_train_ep_duration,74.0
Duration/Mean_val_ep_duration,135.5
Learning_rate/Actor,0.0
Learning_rate/Critic,0.0
Loss/Actor_loss,-0.06378
Loss/Critic_loss,8.75512
Loss/Entropy_bonus,1.56875
Loss/KL_divergence,0.00277
Loss/Policy_loss,-0.01742
Loss/Regularized_Actor_loss,0.07313


[34m[1mwandb[0m: Agent Starting Run: 8d899l89 with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 1024
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	actor_lr: 0.000773587896793357
[34m[1mwandb[0m: 	adv_std: False
[34m[1mwandb[0m: 	bn: False
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.0001857073176170235
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.3
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.004503858073374783
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.8915724430608235
[34m[1mwandb[0m: 	gamma: 0.95
[34m[1mwandb[0m: 	hidden_sizes: [350, 250, 350, 350]
[34m[1mwandb[0m: 	initialization: uniform
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 0.0003356633894007055
[34m[1mwandb[0m: 	l2_factor: 0.00023942858396834625
[34m[1mwandb

Config del trial
{'GAE_lambda': 0.95, 'T': 1024, 'activation': 'tanh', 'actor_lr': 0.000773587896793357, 'adv_std': False, 'bn': False, 'clipping_epsilon': 0.2, 'critic_lr': 0.0001857073176170235, 'decay_method': 'exponential', 'dropout_prob': 0.3, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.004503858073374783, 'epochs': 10, 'exponential_factor': 0.8915724430608235, 'gamma': 0.95, 'hidden_sizes': [350, 250, 350, 350], 'initialization': 'uniform', 'input_size': 10, 'l1_factor': 0.0003356633894007055, 'l2_factor': 0.00023942858396834625, 'lrelu': 0.01, 'minibatch_size': 128, 'momentum': 0.95, 'output_size': 6, 'target_kl': 0.03, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [350, 250, 350, 350], 'output_size': 6, 'dropout_prob': 0.3, 'activation': 'tanh', 'lrelu': 0.01, 'bn': False, 'momentum': 0.95, 'initialization': 'uniform', 'GAE_lambda': 0.95, 'T': 1024, 'actor_lr': 0.0

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▁▂▂▃▃▃▃▃▃▃▃▃▃▃▃▆▃▆▄▅▄▄▆▅▅▄▃▄▅▅▆▄▅▅▃▃▅▇█
Duration/Mean_val_ep_duration,▁▂▃▃▃▃▃▃▃▄▄▄▃▅▅▅▆▅▆▇▆▇▆▆▃▇▆▇▅▆▅▆▆▇▆▆▅▅▆█
Learning_rate/Actor,█▇▅▄▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,█▇▅▄▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,██▄▄▃▅▄▄▄▃▄▃▅▄▂▄▁▃▁▁▁▂▃▁▂▂▂▂▂▃▂▂▂▁▂▂▂▁▁▁
Loss/Critic_loss,█▆▅▃▂▂▁▂▂▁▂▁▄▃▂▃▂▄▂▂▂▂▃▂▃▃▃▃▃▄▃▃▄▃▃▃▃▂▃▃
Loss/Entropy_bonus,█▇▅▃▂▂▂▁▁▁▁▁▂▂▁▁▂▁▂▁▁▁▁▂▁▁▁▁▁▂▁▂▁▁▂▂▂▁▁▁
Loss/KL_divergence,▄▅█▄▇▂▁▃▂▂▂▄▂▃▃▃▂▂▁▂▃▃▂▂▂▃▁▂▃▁▁▂▂▂▂▃▁▃▁▃
Loss/Policy_loss,██▄▄▃▅▄▄▄▃▄▃▅▄▂▄▁▃▁▁▁▂▂▁▂▂▂▂▂▃▂▂▂▁▂▂▂▁▁▁
Loss/Regularized_Actor_loss,██▄▄▃▅▄▄▄▃▃▃▅▄▂▄▁▃▁▁▁▂▂▁▂▂▂▂▂▃▂▂▂▁▂▂▂▁▁▁

0,1
Duration/Mean_train_ep_duration,429.5
Duration/Mean_val_ep_duration,341.70001
Learning_rate/Actor,0.0
Learning_rate/Critic,0.0
Loss/Actor_loss,-8.75909
Loss/Critic_loss,4.6331
Loss/Entropy_bonus,0.24992
Loss/KL_divergence,0.00036
Loss/Policy_loss,-8.75797
Loss/Regularized_Actor_loss,-3.47719


[34m[1mwandb[0m: Agent Starting Run: zc1440ux with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 256
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	actor_lr: 0.0028143823525120857
[34m[1mwandb[0m: 	adv_std: True
[34m[1mwandb[0m: 	bn: True
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.00832217380126745
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.005120707977174764
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.9384941383346692
[34m[1mwandb[0m: 	gamma: 0.9
[34m[1mwandb[0m: 	hidden_sizes: [150, 250, 150, 250]
[34m[1mwandb[0m: 	initialization: uniform
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 4.826149665716156e-06
[34m[1mwandb[0m: 	l2_factor: 0.00022692453063343488
[34m[1mwandb[0m: 	l

Config del trial
{'GAE_lambda': 0.95, 'T': 256, 'activation': 'tanh', 'actor_lr': 0.0028143823525120857, 'adv_std': True, 'bn': True, 'clipping_epsilon': 0.2, 'critic_lr': 0.00832217380126745, 'decay_method': 'exponential', 'dropout_prob': 0, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.005120707977174764, 'epochs': 10, 'exponential_factor': 0.9384941383346692, 'gamma': 0.9, 'hidden_sizes': [150, 250, 150, 250], 'initialization': 'uniform', 'input_size': 10, 'l1_factor': 4.826149665716156e-06, 'l2_factor': 0.00022692453063343488, 'lrelu': 0.1, 'minibatch_size': 256, 'momentum': 0.9, 'output_size': 6, 'target_kl': 0.03, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [150, 250, 150, 250], 'output_size': 6, 'dropout_prob': 0, 'activation': 'tanh', 'lrelu': 0.1, 'bn': True, 'momentum': 0.9, 'initialization': 'uniform', 'GAE_lambda': 0.95, 'T': 256, 'actor_lr': 0.0028143823525120

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▁▂▃▄▃▂▃▂▃▃▃▆▄▆▅▅▃▁▆▆▇▇▅█▃▄▄▆▃▅▅▃▂▃▅▃▃▃▄
Duration/Mean_val_ep_duration,▁▂▄▂▃▃▂▃▅▆▇▇▆▅▇▆▆▇▇██▅▇▆▆▆▅▅▇▆▅▅▄▃▂▃▃▃▂▄
Learning_rate/Actor,██▇▆▆▅▅▅▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,██▇▆▆▅▅▅▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,▁▂▂▃▃▂▂▃▃▃▃▃▃▃▄▃▄▇▅▃▃▄▅▄▄▄▄█▃▄▄▄▃▅▅▄▄▆█▄
Loss/Critic_loss,█▃▃▂▂▂▂▂▂▂▂▂▁▁▂▁▁▁▁▁▁▁▁▁▁▁▂▂▁▁▁▁▁▁▂▂▂▂▂▂
Loss/Entropy_bonus,█▇▆▅▅▄▃▂▂▁▂▂▂▁▂▁▂▁▁▁▂▂▂▂▂▂▁▁▂▂▂▂▂▃▁▂▂▂▁▂
Loss/KL_divergence,▅▅▆▅▅▅▄▅▅▆▄▄▄▄▄▆▃█▅▅▅▅▅▄▅▅▅▇▅▄▅▃▄▃█▄▄▁▄▄
Loss/Policy_loss,▁▂▂▃▃▂▂▃▃▃▃▃▃▃▄▃▄▇▅▃▃▃▅▃▄▄▄█▃▄▄▄▃▄▅▄▄▆█▄
Loss/Regularized_Actor_loss,█▇▇▆▅▄▄▃▃▃▃▂▂▂▃▂▂▄▃▂▁▂▂▁▁▁▂▄▁▂▁▂▁▂▂▂▁▂▄▁

0,1
Duration/Mean_train_ep_duration,94.5
Duration/Mean_val_ep_duration,80.6
Learning_rate/Actor,8e-05
Learning_rate/Critic,0.00022
Loss/Actor_loss,-0.01305
Loss/Critic_loss,7.91894
Loss/Entropy_bonus,1.03445
Loss/KL_divergence,-0.01141
Loss/Policy_loss,-0.00775
Loss/Regularized_Actor_loss,0.13799


[34m[1mwandb[0m: Agent Starting Run: s3hecz7k with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 512
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	actor_lr: 0.000681082990242636
[34m[1mwandb[0m: 	adv_std: False
[34m[1mwandb[0m: 	bn: False
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.003009925162185662
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.014326645469037773
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.9146549861710976
[34m[1mwandb[0m: 	gamma: 0.9
[34m[1mwandb[0m: 	hidden_sizes: [250, 350, 250, 250]
[34m[1mwandb[0m: 	initialization: uniform
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 1.1992561129112308e-06
[34m[1mwandb[0m: 	l2_factor: 3.0053934446129956e-06
[34m[1mwandb[0m:

Config del trial
{'GAE_lambda': 0.95, 'T': 512, 'activation': 'tanh', 'actor_lr': 0.000681082990242636, 'adv_std': False, 'bn': False, 'clipping_epsilon': 0.2, 'critic_lr': 0.003009925162185662, 'decay_method': 'exponential', 'dropout_prob': 0, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.014326645469037773, 'epochs': 10, 'exponential_factor': 0.9146549861710976, 'gamma': 0.9, 'hidden_sizes': [250, 350, 250, 250], 'initialization': 'uniform', 'input_size': 10, 'l1_factor': 1.1992561129112308e-06, 'l2_factor': 3.0053934446129956e-06, 'lrelu': 0.1, 'minibatch_size': 256, 'momentum': 0.99, 'output_size': 6, 'target_kl': 0.01, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [250, 350, 250, 250], 'output_size': 6, 'dropout_prob': 0, 'activation': 'tanh', 'lrelu': 0.1, 'bn': False, 'momentum': 0.99, 'initialization': 'uniform', 'GAE_lambda': 0.95, 'T': 512, 'actor_lr': 0.0006810829

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▃▄▄▄▄▄▅▄▄▅▅▅▅▅▆▇▅▆▆▄▇▆▇██▆▆▆▇▇▆▇▆▇█▅█▇█
Duration/Mean_val_ep_duration,▁▃▄▄▃▄▄▅▅▅▄▅▅▅▅▆▆██▇▇▇▅▅▆▇▇▇▇▇▆▇▇▅▇▇▆▆▅█
Learning_rate/Actor,█▆▅▄▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,█▆▅▄▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,██▆▄▄▃▄▄▄▃▂▃▃▃▃▃▂▃▂▃▃▃▂▃▁▁▂▂▃▁▂▁▁▃▁▂▃▂▂▂
Loss/Critic_loss,█▇▄▃▃▃▃▂▂▂▂▁▁▂▂▂▂▂▂▃▃▃▂▂▂▁▂▃▃▁▃▂▂▂▂▁▄▁▂▁
Loss/Entropy_bonus,█▆▅▃▃▂▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▂▁▁▁▁▁▁▁
Loss/KL_divergence,▇█▄▁█▃▇▆▅▁▅▅▄▆▄▄▃▃▄▃▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄
Loss/Policy_loss,██▆▄▄▃▄▄▄▃▂▃▃▃▃▃▂▃▂▃▃▃▂▃▁▁▂▂▃▁▂▁▁▃▁▂▃▂▂▂
Loss/Regularized_Actor_loss,██▆▄▄▃▄▄▄▃▂▃▃▃▃▃▂▃▂▃▃▃▂▃▁▁▂▂▃▁▂▁▁▃▁▂▃▂▂▂

0,1
Duration/Mean_train_ep_duration,69.16666
Duration/Mean_val_ep_duration,83.0
Learning_rate/Actor,0.0
Learning_rate/Critic,0.0
Loss/Actor_loss,3.61424
Loss/Critic_loss,6.27801
Loss/Entropy_bonus,1.10262
Loss/KL_divergence,-0.0
Loss/Policy_loss,3.63003
Loss/Regularized_Actor_loss,3.64052


[34m[1mwandb[0m: Agent Starting Run: mvk8r5w8 with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 768
[34m[1mwandb[0m: 	activation: lrelu
[34m[1mwandb[0m: 	actor_lr: 0.0020952538744408483
[34m[1mwandb[0m: 	adv_std: True
[34m[1mwandb[0m: 	bn: True
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.00026850623748804345
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.2
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.018740638770199724
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.9664345870680864
[34m[1mwandb[0m: 	gamma: 0.95
[34m[1mwandb[0m: 	hidden_sizes: [250, 350, 150, 150]
[34m[1mwandb[0m: 	initialization: normal
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 1.939769615978174e-06
[34m[1mwandb[0m: 	l2_factor: 4.962177113467285e-05
[34m[1mwandb[0

Config del trial
{'GAE_lambda': 0.95, 'T': 768, 'activation': 'lrelu', 'actor_lr': 0.0020952538744408483, 'adv_std': True, 'bn': True, 'clipping_epsilon': 0.2, 'critic_lr': 0.00026850623748804345, 'decay_method': 'exponential', 'dropout_prob': 0.2, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.018740638770199724, 'epochs': 10, 'exponential_factor': 0.9664345870680864, 'gamma': 0.95, 'hidden_sizes': [250, 350, 150, 150], 'initialization': 'normal', 'input_size': 10, 'l1_factor': 1.939769615978174e-06, 'l2_factor': 4.962177113467285e-05, 'lrelu': 0.001, 'minibatch_size': 64, 'momentum': 0.99, 'output_size': 6, 'target_kl': 0.03, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [250, 350, 150, 150], 'output_size': 6, 'dropout_prob': 0.2, 'activation': 'lrelu', 'lrelu': 0.001, 'bn': True, 'momentum': 0.99, 'initialization': 'normal', 'GAE_lambda': 0.95, 'T': 768, 'actor_lr': 0.0020

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▂▆▅▆▆▇▅▆▄▅▄▄▃▅▅▆▆▆▆▆▆▇▆▇▆▇▇▇▇▇▇██▇▇▇▇▇█
Duration/Mean_val_ep_duration,▁▁▄▅▅▅▇▇▆▅▆▄▃▄▄▅▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇█▇▇▇██▇██
Learning_rate/Actor,██▇▆▅▅▄▄▄▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,██▇▆▅▅▄▄▄▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,▃▄▄▁▂▂▆▃▄▄▄▅▄▆▆▆▄▄▅▇▇▇▆▄▇▇▆▆▆▇▆▇█▆▇▆▆▆▇▇
Loss/Critic_loss,█▆▄▃▃▃▂▂▂▄▃▃▂▂▃▃▁▁▂▁▂▁▁▂▁▁▁▁▁▁▁▃▂▄▂▁▁▁▁▂
Loss/Entropy_bonus,██▇▆▅▅▅▄▄▂▂▁▁▂▂▁▂▁▁▁▁▂▂▂▂▂▂▂▁▂▁▁▂▃▂▁▂▂▁▂
Loss/KL_divergence,▃▄▅▄▃▂▇▄▅▇▄▄▁▃▄▄▃▂▄▃█▃▂▄▂▂▃▃▃▃▂▃▃▂▃▁▃▄▃▃
Loss/Policy_loss,▄▄▅▁▂▂▆▃▄▃▄▄▄▆▆▅▄▃▅▆▆▆▆▄▇▇▅▅▆▇▆▆█▅▆▆▅▅▆▇
Loss/Regularized_Actor_loss,█▆▅▁▂▂▆▃▄▃▄▅▄▆▆▅▄▃▅▆▇▆▆▃▇▇▅▅▆▇▆▆█▅▆▆▅▅▆▆

0,1
Duration/Mean_train_ep_duration,109.0
Duration/Mean_val_ep_duration,106.9
Learning_rate/Actor,1e-05
Learning_rate/Critic,0.0
Loss/Actor_loss,0.00259
Loss/Critic_loss,2.83182
Loss/Entropy_bonus,0.65084
Loss/KL_divergence,-0.00489
Loss/Policy_loss,0.01479
Loss/Regularized_Actor_loss,0.02897


[34m[1mwandb[0m: Agent Starting Run: q1x3t79p with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 768
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	actor_lr: 0.0007536588516135134
[34m[1mwandb[0m: 	adv_std: False
[34m[1mwandb[0m: 	bn: False
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.00011416866183023788
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.0023705323273667737
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.9481002044596172
[34m[1mwandb[0m: 	gamma: 0.95
[34m[1mwandb[0m: 	hidden_sizes: [350, 350, 150, 350]
[34m[1mwandb[0m: 	initialization: normal
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 0.0002908570561182219
[34m[1mwandb[0m: 	l2_factor: 2.821425929056515e-05
[34m[1mwandb[0

Config del trial
{'GAE_lambda': 0.95, 'T': 768, 'activation': 'tanh', 'actor_lr': 0.0007536588516135134, 'adv_std': False, 'bn': False, 'clipping_epsilon': 0.2, 'critic_lr': 0.00011416866183023788, 'decay_method': 'exponential', 'dropout_prob': 0, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.0023705323273667737, 'epochs': 10, 'exponential_factor': 0.9481002044596172, 'gamma': 0.95, 'hidden_sizes': [350, 350, 150, 350], 'initialization': 'normal', 'input_size': 10, 'l1_factor': 0.0002908570561182219, 'l2_factor': 2.821425929056515e-05, 'lrelu': 0.01, 'minibatch_size': 128, 'momentum': 0.95, 'output_size': 6, 'target_kl': 0.02, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [350, 350, 150, 350], 'output_size': 6, 'dropout_prob': 0, 'activation': 'tanh', 'lrelu': 0.01, 'bn': False, 'momentum': 0.95, 'initialization': 'normal', 'GAE_lambda': 0.95, 'T': 768, 'actor_lr': 0.0007536

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▅▄▄▅▄▅▆▅▇▇▇██▆█▆█████▇▇▇▇▇▇▇▇▇▇██████▇█
Duration/Mean_val_ep_duration,▁▃▄▅▃▃▆▅▅█▆▇█▇▅▆▇▆█▅▆▇▆▇▆▆▇▇▆▇▇▅▇▇▇▇▇▇▇▆
Learning_rate/Actor,█▇▆▆▅▅▄▄▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,█▇▆▆▅▅▄▄▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,█▃▅▃▄▄▅▃▃▃▂▂▂▂▂▃▂▂▃▂▁▁▂▁▂▂▂▂▂▂▁▂▃▁▁▁▂▂▂▂
Loss/Critic_loss,█▃▃▂▃▂▃▂▂▂▂▂▂▂▂▂▂▁▂▁▁▁▁▁▂▁▂▂▁▁▁▂▂▁▁▁▁▁▂▁
Loss/Entropy_bonus,██▇▇▆▅▅▅▄▄▄▃▃▃▂▃▂▂▂▂▂▂▂▁▂▂▂▁▁▂▁▁▂▁▂▁▁▁▁▁
Loss/KL_divergence,▃▅▃▃█▄▂▂▂▄▆▄▃▂▁▄▅▃▃▃▂▂▃▃▂▂▃▃▂▃▄▃▁▃▃▃▃▃▃▃
Loss/Policy_loss,█▃▅▃▄▄▅▃▃▃▂▂▂▂▂▃▂▂▃▂▁▁▂▁▂▂▂▂▂▂▁▂▃▁▁▁▂▂▂▂
Loss/Regularized_Actor_loss,█▃▅▃▄▄▅▃▃▃▂▂▂▂▂▃▂▂▂▂▁▁▂▁▂▂▂▂▂▂▁▂▃▁▁▁▂▂▂▂

0,1
Duration/Mean_train_ep_duration,97.33334
Duration/Mean_val_ep_duration,89.0
Learning_rate/Actor,0.0
Learning_rate/Critic,0.0
Loss/Actor_loss,1.67226
Loss/Critic_loss,2.10952
Loss/Entropy_bonus,0.37453
Loss/KL_divergence,0.00095
Loss/Policy_loss,1.67315
Loss/Regularized_Actor_loss,4.36373


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: s088ykix with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 768
[34m[1mwandb[0m: 	activation: lrelu
[34m[1mwandb[0m: 	actor_lr: 0.0002342203571672758
[34m[1mwandb[0m: 	adv_std: False
[34m[1mwandb[0m: 	bn: False
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.0007911408378897133
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.2
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.0038653135491394943
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.8541893023523218
[34m[1mwandb[0m: 	gamma: 0.95
[34m[1mwandb[0m: 	hidden_sizes: [250, 150, 150, 350]
[34m[1mwandb[0m: 	initialization: orthogonal
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 0.0

Config del trial
{'GAE_lambda': 0.95, 'T': 768, 'activation': 'lrelu', 'actor_lr': 0.0002342203571672758, 'adv_std': False, 'bn': False, 'clipping_epsilon': 0.2, 'critic_lr': 0.0007911408378897133, 'decay_method': 'exponential', 'dropout_prob': 0.2, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.0038653135491394943, 'epochs': 10, 'exponential_factor': 0.8541893023523218, 'gamma': 0.95, 'hidden_sizes': [250, 150, 150, 350], 'initialization': 'orthogonal', 'input_size': 10, 'l1_factor': 0.00020194282927196984, 'l2_factor': 0.00013826434174276167, 'lrelu': 0.001, 'minibatch_size': 64, 'momentum': 0.95, 'output_size': 6, 'target_kl': 0.02, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [250, 150, 150, 350], 'output_size': 6, 'dropout_prob': 0.2, 'activation': 'lrelu', 'lrelu': 0.001, 'bn': False, 'momentum': 0.95, 'initialization': 'orthogonal', 'GAE_lambda': 0.95, 'T': 768, 'acto

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▂▂▂▃▃▄▅▄▅▄▆▄▅▆▅▅▅▄▅▅▇▆▇▅▅▄▅▆▅▄▇▅▅▄▇▆█▇▅
Duration/Mean_val_ep_duration,▁▂▃▄▅▄▅▃▆▆▆▆▇▇▇█▅▇▅█▇▆▇▇▇▆▆█▅▄▅█▇▆▇▅█▅▅▇
Learning_rate/Actor,█▆▄▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,█▆▄▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,█▇▅▅▄▃▃▂▂▃▂▂▃▁▂▂▃▃▃▂▂▁▂▂▂▃▂▂▂▃▃▁▃▂▂▂▂▃▂▃
Loss/Critic_loss,█▅▅▄▂▂▂▂▂▃▂▂▂▁▂▁▂▃▂▂▁▁▁▂▃▂▂▂▁▃▂▁▃▂▃▂▂▃▂▂
Loss/Entropy_bonus,▇█▆▆▅▅▅▃▄▂▁▃▃▂▃▄▃▂▂▂▃▄▂▂▃▂▅▃▃▄▂▄▂▁▄▃▃▃▂▄
Loss/KL_divergence,█▅▄▂▂▁▂▃▁▄▃▃▂▂▃▂▂▂▂▂▄▁▂▂▁▂▁▂▂▂▃▃▂▂▃▃▂▃▄▁
Loss/Policy_loss,█▇▅▅▄▃▃▂▂▃▂▂▃▁▂▂▃▃▃▂▂▁▂▂▂▃▂▂▂▃▃▁▃▂▂▂▂▃▂▃
Loss/Regularized_Actor_loss,█▇▅▅▄▃▃▂▂▃▂▂▃▁▂▂▃▃▃▂▂▁▂▂▂▃▂▂▂▃▃▁▃▂▂▂▂▃▂▃

0,1
Duration/Mean_train_ep_duration,101.33334
Duration/Mean_val_ep_duration,141.60001
Learning_rate/Actor,0.0
Learning_rate/Critic,0.0
Loss/Actor_loss,5.67672
Loss/Critic_loss,6.96334
Loss/Entropy_bonus,1.54523
Loss/KL_divergence,-0.01396
Loss/Policy_loss,5.68269
Loss/Regularized_Actor_loss,7.25464


[34m[1mwandb[0m: Agent Starting Run: lz3ykfiw with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 768
[34m[1mwandb[0m: 	activation: lrelu
[34m[1mwandb[0m: 	actor_lr: 0.007558301798269005
[34m[1mwandb[0m: 	adv_std: True
[34m[1mwandb[0m: 	bn: True
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.00011720762159629845
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.3
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.0474153757740874
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.9531290576125508
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	hidden_sizes: [350, 150, 350, 150]
[34m[1mwandb[0m: 	initialization: normal
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 1.6811209941228222e-05
[34m[1mwandb[0m: 	l2_factor: 3.444968840334554e-05
[34m[1mwandb[0m:

Config del trial
{'GAE_lambda': 0.95, 'T': 768, 'activation': 'lrelu', 'actor_lr': 0.007558301798269005, 'adv_std': True, 'bn': True, 'clipping_epsilon': 0.2, 'critic_lr': 0.00011720762159629845, 'decay_method': 'exponential', 'dropout_prob': 0.3, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.0474153757740874, 'epochs': 10, 'exponential_factor': 0.9531290576125508, 'gamma': 0.99, 'hidden_sizes': [350, 150, 350, 150], 'initialization': 'normal', 'input_size': 10, 'l1_factor': 1.6811209941228222e-05, 'l2_factor': 3.444968840334554e-05, 'lrelu': 0.001, 'minibatch_size': 64, 'momentum': 0.95, 'output_size': 6, 'target_kl': 0.01, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [350, 150, 350, 150], 'output_size': 6, 'dropout_prob': 0.3, 'activation': 'lrelu', 'lrelu': 0.001, 'bn': True, 'momentum': 0.95, 'initialization': 'normal', 'GAE_lambda': 0.95, 'T': 768, 'actor_lr': 0.007558

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▂▂▃▃▃▅▃▅▄▇▅▆▆▆▆▆▆▆▇▆▆▆▇▆█▆▇▆█▅▆▆▅▅▆▆▇▆▆
Duration/Mean_val_ep_duration,▁▂▂▄▄▄▄▅▄▆▅▆▅▅▆▇▇█▇▇▇▇█▇▆▇█▇▇█▆▆▅▆▇▇▇▇▇▆
Learning_rate/Actor,██▇▆▆▅▅▅▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,██▇▆▆▅▅▅▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,▂▁▃▁▃▂▂▂█▂▇▃▁▂▂▂▂▂▃▃▃▂▃▂▂▃▃▂▂▂▂▄▃▁▂▂▃▂▂▂
Loss/Critic_loss,█▄▅▃▃▃▃▄▂▃▂▂▂▂▂▁▂▁▂▁▁▁▂▁▁▁▂▂▁▁▂▁▁▂▁▁▂▁▂▂
Loss/Entropy_bonus,█▅▃▂▂▂▃▁▂▃▃▄▃▃▃▄▄▅▄▄▅▅▄▅▄▄▄▅▅▅▄▄▅▅▅▅▄▅▅▄
Loss/KL_divergence,▅▁▇▄▆▆▁▇▆▂▇▆█▅▆▅▆▃▅█▅▄▆▅▄▆▆▅▆▆▅▅▆▆▇▅▆▇▅█
Loss/Policy_loss,▃▁▃▁▂▂▂▂█▂▇▃▁▂▂▂▂▂▃▃▃▃▃▂▂▃▃▂▂▂▂▄▃▁▂▂▃▂▂▂
Loss/Regularized_Actor_loss,▅▂▄▂▄▃▂▃█▃█▄▂▃▃▂▃▂▃▃▃▂▃▂▂▃▂▂▂▁▂▃▂▁▁▂▃▂▂▂

0,1
Duration/Mean_train_ep_duration,141.60001
Duration/Mean_val_ep_duration,136.5
Learning_rate/Actor,0.00023
Learning_rate/Critic,0.0
Loss/Actor_loss,-0.0596
Loss/Critic_loss,6.9305
Loss/Entropy_bonus,1.33119
Loss/KL_divergence,0.03692
Loss/Policy_loss,0.00352
Loss/Regularized_Actor_loss,0.00947


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 9zbkshi1 with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 512
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	actor_lr: 0.0029818242878904564
[34m[1mwandb[0m: 	adv_std: True
[34m[1mwandb[0m: 	bn: True
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.001691971447829929
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.009824024358290466
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.939008268962999
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	hidden_sizes: [350, 250, 150, 250]
[34m[1mwandb[0m: 	initialization: normal
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 7.2048078655732

Config del trial
{'GAE_lambda': 0.95, 'T': 512, 'activation': 'tanh', 'actor_lr': 0.0029818242878904564, 'adv_std': True, 'bn': True, 'clipping_epsilon': 0.2, 'critic_lr': 0.001691971447829929, 'decay_method': 'exponential', 'dropout_prob': 0, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.009824024358290466, 'epochs': 10, 'exponential_factor': 0.939008268962999, 'gamma': 0.99, 'hidden_sizes': [350, 250, 150, 250], 'initialization': 'normal', 'input_size': 10, 'l1_factor': 7.2048078655732085e-06, 'l2_factor': 1.6795609093063923e-06, 'lrelu': 0.001, 'minibatch_size': 64, 'momentum': 0.8, 'output_size': 6, 'target_kl': 0.02, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [350, 250, 150, 250], 'output_size': 6, 'dropout_prob': 0, 'activation': 'tanh', 'lrelu': 0.001, 'bn': True, 'momentum': 0.8, 'initialization': 'normal', 'GAE_lambda': 0.95, 'T': 512, 'actor_lr': 0.0029818242878

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▃▄▃▄▄▄▃▅▄▆█▆▆▅▇█▇▆▅▅▆▆▆▅▇▆▆▆▆▆▆▅▅▆▆▆▆▆▆
Duration/Mean_val_ep_duration,▁▂▂▅▄▃▄▄▃▄▆▇▇▅▇▆█▆▆▅▆▅▄▇▆▆▇▇▇▆▅▆▆▆▆▆▆▆▆▆
Learning_rate/Actor,█▇▆▅▅▄▄▃▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,█▇▆▅▅▄▄▃▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,▂▅▅▃▃▂▂▃▂▆▃▄█▁▂▅▃▃▅▃▃▅▃▅▄▂▄▅▆▅▄▅▄▆▅▆▃▃▃▅
Loss/Critic_loss,█▄▆▃▃▃▃▃▃▃▃▂▂▄▂▂▂▁▂▃▃▁▂▂▂▁▁▁▂▁▁▁▂▂▂▂▂▂▂▁
Loss/Entropy_bonus,█▇▇▅▄▃▃▄▄▃▂▂▂▃▂▃▂▃▃▂▂▂▂▂▂▂▃▂▃▁▂▁▁▂▂▁▁▂▁▂
Loss/KL_divergence,▅▇▆▄▄▆▃▅▄▇▃▁▄▄▄▆▁▄▅▄▅▆▄█▄▄▆▄▅▃▄▄▅▄▆▄▄▄▄▆
Loss/Policy_loss,▂▅▅▃▃▂▂▃▂▆▃▄█▁▂▅▃▃▅▃▃▅▃▅▄▂▄▅▆▅▄▅▄▆▅▆▃▃▃▅
Loss/Regularized_Actor_loss,▄▆▆▄▄▃▃▃▂▆▃▅█▁▂▅▃▃▅▃▂▅▃▅▃▂▄▄▆▅▄▅▄▅▅▆▃▃▃▅

0,1
Duration/Mean_train_ep_duration,122.0
Duration/Mean_val_ep_duration,114.6
Learning_rate/Actor,1e-05
Learning_rate/Critic,0.0
Loss/Actor_loss,0.00379
Loss/Critic_loss,2.33869
Loss/Entropy_bonus,0.79358
Loss/KL_divergence,0.02165
Loss/Policy_loss,0.01159
Loss/Regularized_Actor_loss,0.04667


[34m[1mwandb[0m: Agent Starting Run: r7rznh59 with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 256
[34m[1mwandb[0m: 	activation: lrelu
[34m[1mwandb[0m: 	actor_lr: 0.001196962607264042
[34m[1mwandb[0m: 	adv_std: True
[34m[1mwandb[0m: 	bn: False
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.00198745920284241
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.3
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.024886959094987
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.8634911459470309
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	hidden_sizes: [250, 150, 150, 350]
[34m[1mwandb[0m: 	initialization: normal
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 0.00044071834483726166
[34m[1mwandb[0m: 	l2_factor: 2.69646074087783e-06
[34m[1mwandb[0m: 	lr

Config del trial
{'GAE_lambda': 0.95, 'T': 256, 'activation': 'lrelu', 'actor_lr': 0.001196962607264042, 'adv_std': True, 'bn': False, 'clipping_epsilon': 0.2, 'critic_lr': 0.00198745920284241, 'decay_method': 'exponential', 'dropout_prob': 0.3, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.024886959094987, 'epochs': 10, 'exponential_factor': 0.8634911459470309, 'gamma': 0.99, 'hidden_sizes': [250, 150, 150, 350], 'initialization': 'normal', 'input_size': 10, 'l1_factor': 0.00044071834483726166, 'l2_factor': 2.69646074087783e-06, 'lrelu': 0.1, 'minibatch_size': 32, 'momentum': 0.95, 'output_size': 6, 'target_kl': 0.03, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [250, 150, 150, 350], 'output_size': 6, 'dropout_prob': 0.3, 'activation': 'lrelu', 'lrelu': 0.1, 'bn': False, 'momentum': 0.95, 'initialization': 'normal', 'GAE_lambda': 0.95, 'T': 256, 'actor_lr': 0.0011969626072

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▃██████████████████████████████████████
Duration/Mean_val_ep_duration,▁▇▇▇█▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
Learning_rate/Actor,█▆▄▃▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,█▆▄▃▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,▂▁█▁▁▁▂▁▂▁▂▁▂▁▂▂▁▁▂▁▁▁▁▂▁▁▁▂▂▁▂▂▆▁▁▁▁▂▁▁
Loss/Critic_loss,█▂▃▄▂▂▂▃▂▂▂▂▁▂▂▂▂▂▂▂▂▁▁▂▁▂▁▂▂▁▁▁▂▁▂▂▂▂▂▂
Loss/Entropy_bonus,█▂▂▁▂▂▁▁▂▃▂▂▂▄▂▃▃▂▃▃▂▂▄▂▂▂▂▂▂▂▂▂▄▂▃▃▃▁▂▃
Loss/KL_divergence,█▁▄▃▃▄▃▃▅▃▄▃▄▁▃▃▃▃▃▁▂▃▁▅▂▃▃▄▄▃▃▄▃▃▅▁▂▃▃▃
Loss/Policy_loss,▂▁█▁▁▁▂▁▂▁▂▁▂▁▂▂▁▁▂▁▁▁▁▂▁▁▁▂▂▁▂▂▆▁▁▁▁▂▁▁
Loss/Regularized_Actor_loss,▇▃█▂▂▁▂▁▂▁▂▁▂▁▂▁▁▁▁▁▁▁▁▂▁▁▁▂▂▁▂▂▅▁▁▁▁▂▁▁

0,1
Duration/Mean_train_ep_duration,127.0
Duration/Mean_val_ep_duration,125.6
Learning_rate/Actor,0.0
Learning_rate/Critic,0.0
Loss/Actor_loss,-0.02427
Loss/Critic_loss,7.76948
Loss/Entropy_bonus,0.14022
Loss/KL_divergence,0.05537
Loss/Policy_loss,-0.02078
Loss/Regularized_Actor_loss,1.50647


[34m[1mwandb[0m: Agent Starting Run: egg2fsuq with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 512
[34m[1mwandb[0m: 	activation: lrelu
[34m[1mwandb[0m: 	actor_lr: 0.009088186882336351
[34m[1mwandb[0m: 	adv_std: True
[34m[1mwandb[0m: 	bn: True
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.0017531204231651775
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.1
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.007284770146609111
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.969499150142167
[34m[1mwandb[0m: 	gamma: 0.9
[34m[1mwandb[0m: 	hidden_sizes: [150, 150, 150]
[34m[1mwandb[0m: 	initialization: normal
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 8.277916768734815e-05
[34m[1mwandb[0m: 	l2_factor: 5.378107398897753e-05
[34m[1mwandb[0m: 	lrelu

Config del trial
{'GAE_lambda': 0.95, 'T': 512, 'activation': 'lrelu', 'actor_lr': 0.009088186882336351, 'adv_std': True, 'bn': True, 'clipping_epsilon': 0.2, 'critic_lr': 0.0017531204231651775, 'decay_method': 'exponential', 'dropout_prob': 0.1, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.007284770146609111, 'epochs': 10, 'exponential_factor': 0.969499150142167, 'gamma': 0.9, 'hidden_sizes': [150, 150, 150], 'initialization': 'normal', 'input_size': 10, 'l1_factor': 8.277916768734815e-05, 'l2_factor': 5.378107398897753e-05, 'lrelu': 0.1, 'minibatch_size': 32, 'momentum': 0.8, 'output_size': 6, 'target_kl': 0.03, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [150, 150, 150], 'output_size': 6, 'dropout_prob': 0.1, 'activation': 'lrelu', 'lrelu': 0.1, 'bn': True, 'momentum': 0.8, 'initialization': 'normal', 'GAE_lambda': 0.95, 'T': 512, 'actor_lr': 0.009088186882336351, 'adv

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▂▄▄▄▃▃▃▃▄▄▄▄▄▄▄▄▄▄▄▅▄▄▅▄▄▄▄▅▄▄▄▄▄▅█▅▄▄▄
Duration/Mean_val_ep_duration,▁▁▅▄▂▂▂▃▃▃▄▄▄▄▄▄▄▄▄▄▄▅▅▅▅▇▅▄▇█▅▄▄▅▅▅▅▆▆▅
Learning_rate/Actor,██▇▆▆▅▅▅▄▄▄▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,██▇▆▆▅▅▅▄▄▄▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,▅▅▁▃▃▁▄▇▁▁▄▂▃▃▁▃▁▁▁▂▄▅▃█▃▃▄▃▁▁▃▃▃▁▂▃▃▃▂▃
Loss/Critic_loss,█▆▁▃▁▁▂▁▁▂▁▁▁▁▁▁▁▁▁▂▁▂▂▁▂▁▂▁▁▁▂▂▁▂▁▁▁▁▁▁
Loss/Entropy_bonus,█▄▆▄▅▄▃▂▂▂▂▁▂▁▁▁▂▁▂▃▂▂▃▃▃▂▃▂▁▁▂▂▂▁▂▂▂▂▁▁
Loss/KL_divergence,▆▅▄▅▆▂▅▅▃▅▅▄▃█▂▁▂▇▂█▃▂▁▂▂▃▅▁▂▁▃▂▂▂▂▂▃▁▅▂
Loss/Policy_loss,▅▅▁▃▃▁▄▇▁▁▄▂▃▃▁▃▁▁▁▂▄▅▃█▃▃▄▃▁▁▃▃▃▁▂▃▃▃▂▃
Loss/Regularized_Actor_loss,██▃▄▃▂▃▅▂▂▃▂▂▂▁▂▁▁▁▂▂▃▂▄▂▂▃▂▁▁▂▂▂▁▁▂▂▂▂▂

0,1
Duration/Mean_train_ep_duration,129.5
Duration/Mean_val_ep_duration,169.89999
Learning_rate/Actor,0.00025
Learning_rate/Critic,5e-05
Loss/Actor_loss,0.01223
Loss/Critic_loss,2.11799
Loss/Entropy_bonus,0.32314
Loss/KL_divergence,-0.01129
Loss/Policy_loss,0.01459
Loss/Regularized_Actor_loss,0.03942


[34m[1mwandb[0m: Agent Starting Run: badcdinu with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 768
[34m[1mwandb[0m: 	activation: lrelu
[34m[1mwandb[0m: 	actor_lr: 0.007930857175333586
[34m[1mwandb[0m: 	adv_std: False
[34m[1mwandb[0m: 	bn: False
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.0004150315798785521
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.004563162421066501
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.9264410540692616
[34m[1mwandb[0m: 	gamma: 0.95
[34m[1mwandb[0m: 	hidden_sizes: [150, 150, 250]
[34m[1mwandb[0m: 	initialization: uniform
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 0.0001845535175972598
[34m[1mwandb[0m: 	l2_factor: 2.056654905232913e-06
[34m[1mwandb[0m: 	lr

Config del trial
{'GAE_lambda': 0.95, 'T': 768, 'activation': 'lrelu', 'actor_lr': 0.007930857175333586, 'adv_std': False, 'bn': False, 'clipping_epsilon': 0.2, 'critic_lr': 0.0004150315798785521, 'decay_method': 'exponential', 'dropout_prob': 0, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.004563162421066501, 'epochs': 10, 'exponential_factor': 0.9264410540692616, 'gamma': 0.95, 'hidden_sizes': [150, 150, 250], 'initialization': 'uniform', 'input_size': 10, 'l1_factor': 0.0001845535175972598, 'l2_factor': 2.056654905232913e-06, 'lrelu': 0.1, 'minibatch_size': 128, 'momentum': 0.8, 'output_size': 6, 'target_kl': 0.03, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [150, 150, 250], 'output_size': 6, 'dropout_prob': 0, 'activation': 'lrelu', 'lrelu': 0.1, 'bn': False, 'momentum': 0.8, 'initialization': 'uniform', 'GAE_lambda': 0.95, 'T': 768, 'actor_lr': 0.007930857175333586, 

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▃██████████████████▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▄███
Duration/Mean_val_ep_duration,███████████████████▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▄████
Learning_rate/Actor,█▇▇▆▆▅▅▄▄▄▄▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,█▇▇▆▆▅▅▄▄▄▄▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,▁▂▁▁▂▂▁▂▁▁▁▁▂▁▂▁▁▂▁████████████████▆▄▂▂▂
Loss/Critic_loss,▆▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▄▁▁▁▁▁▁▁▁▁▁▁▁▁▁▆█▇▅
Loss/Entropy_bonus,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁█▁▁▁
Loss/KL_divergence,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Policy_loss,▁▂▁▁▂▂▁▂▁▁▁▁▂▁▂▁▁▂▁████████████████▆▄▂▂▂
Loss/Regularized_Actor_loss,▁▂▁▁▂▁▁▁▁▁▁▁▂▁▂▁▁▂▁████████████████▆▃▂▂▂

0,1
Duration/Mean_train_ep_duration,36.0
Duration/Mean_val_ep_duration,36.0
Learning_rate/Actor,0.00013
Learning_rate/Critic,1e-05
Loss/Actor_loss,29.86222
Loss/Critic_loss,23.04866
Loss/Entropy_bonus,2e-05
Loss/KL_divergence,-0.0
Loss/Policy_loss,29.86222
Loss/Regularized_Actor_loss,29.93961


[34m[1mwandb[0m: Agent Starting Run: afg97tih with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 512
[34m[1mwandb[0m: 	activation: lrelu
[34m[1mwandb[0m: 	actor_lr: 0.00042233164420571456
[34m[1mwandb[0m: 	adv_std: True
[34m[1mwandb[0m: 	bn: False
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.00015680527449756009
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.3
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.00385734538425536
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.8947396766648134
[34m[1mwandb[0m: 	gamma: 0.9
[34m[1mwandb[0m: 	hidden_sizes: [150, 250, 150, 150]
[34m[1mwandb[0m: 	initialization: uniform
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 0.0001825763949892606
[34m[1mwandb[0m: 	l2_factor: 4.7592011607222075e-05
[34m[1mwandb

Config del trial
{'GAE_lambda': 0.95, 'T': 512, 'activation': 'lrelu', 'actor_lr': 0.00042233164420571456, 'adv_std': True, 'bn': False, 'clipping_epsilon': 0.2, 'critic_lr': 0.00015680527449756009, 'decay_method': 'exponential', 'dropout_prob': 0.3, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.00385734538425536, 'epochs': 10, 'exponential_factor': 0.8947396766648134, 'gamma': 0.9, 'hidden_sizes': [150, 250, 150, 150], 'initialization': 'uniform', 'input_size': 10, 'l1_factor': 0.0001825763949892606, 'l2_factor': 4.7592011607222075e-05, 'lrelu': 0.1, 'minibatch_size': 256, 'momentum': 0.9, 'output_size': 6, 'target_kl': 0.01, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [150, 250, 150, 150], 'output_size': 6, 'dropout_prob': 0.3, 'activation': 'lrelu', 'lrelu': 0.1, 'bn': False, 'momentum': 0.9, 'initialization': 'uniform', 'GAE_lambda': 0.95, 'T': 512, 'actor_lr': 0.00042

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▂▂▃▃▃▂▄▃▃▃▄▃▃▄▁▄▂▅▄▇▃▅▆▄▅▄▄▂▃▂▆▅▃▆██
Duration/Mean_val_ep_duration,▃▃▅▄▃▆▆▃▄▂▃▂▁▆▅▆▂▆▂▇▄▇█▄▂▄▃▃▄▃▅▅▆▄▄▂▃
Learning_rate/Actor,█▇▇▆▅▅▅▄▄▃▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,█▇▇▆▅▅▅▄▄▃▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,▇▄▇▃▆▃█▅▃▃▄▅▂▅▃▄▄▁▁▅▂▂▁▂▁▃▃▂▁▄▂▃▂▂▃▃▂
Loss/Critic_loss,█▆▄▄▂▂▂▂▃▂▂▂▂▂▁▂▂▂▁▁▁▂▁▁▂▂▁▁▁▂▂▁▁▁▁▁▂
Loss/Entropy_bonus,▄▁▂▃▄▄▅▅▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇██████████████
Loss/KL_divergence,█▅▅▃▅▄▄▂▂▂▁▂▁▂▂▁▂▂▂▂▂▂▂▂▁▂▁▂▁▁▂▂▁▁▁▂▂
Loss/Policy_loss,▇▄▇▃▅▃█▅▃▃▄▅▂▅▃▄▄▁▁▅▂▂▁▂▁▃▃▂▁▄▂▃▂▂▃▃▂
Loss/Regularized_Actor_loss,█▅█▄▆▄█▅▃▃▄▅▂▅▃▄▄▁▁▄▂▂▁▂▁▂▂▂▁▃▂▃▁▁▂▂▂

0,1
Duration/Mean_train_ep_duration,70.0
Duration/Mean_val_ep_duration,38.5
Learning_rate/Actor,1e-05
Learning_rate/Critic,0.0
Loss/Actor_loss,0.02616
Loss/Critic_loss,30.49477
Loss/Entropy_bonus,1.62987
Loss/KL_divergence,0.01645
Loss/Policy_loss,0.03245
Loss/Regularized_Actor_loss,1.66927


[34m[1mwandb[0m: Agent Starting Run: cidh2ccc with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 256
[34m[1mwandb[0m: 	activation: lrelu
[34m[1mwandb[0m: 	actor_lr: 0.0006644086270589773
[34m[1mwandb[0m: 	adv_std: False
[34m[1mwandb[0m: 	bn: True
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.007553606477719556
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.3
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.0316569146656629
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.8529997061380378
[34m[1mwandb[0m: 	gamma: 0.95
[34m[1mwandb[0m: 	hidden_sizes: [350, 350, 150, 350]
[34m[1mwandb[0m: 	initialization: orthogonal
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 7.500663420912767e-05
[34m[1mwandb[0m: 	l2_factor: 0.0004437218844894752
[34m[1mwandb[

Config del trial
{'GAE_lambda': 0.95, 'T': 256, 'activation': 'lrelu', 'actor_lr': 0.0006644086270589773, 'adv_std': False, 'bn': True, 'clipping_epsilon': 0.2, 'critic_lr': 0.007553606477719556, 'decay_method': 'exponential', 'dropout_prob': 0.3, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.0316569146656629, 'epochs': 10, 'exponential_factor': 0.8529997061380378, 'gamma': 0.95, 'hidden_sizes': [350, 350, 150, 350], 'initialization': 'orthogonal', 'input_size': 10, 'l1_factor': 7.500663420912767e-05, 'l2_factor': 0.0004437218844894752, 'lrelu': 0.001, 'minibatch_size': 64, 'momentum': 0.95, 'output_size': 6, 'target_kl': 0.02, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [350, 350, 150, 350], 'output_size': 6, 'dropout_prob': 0.3, 'activation': 'lrelu', 'lrelu': 0.001, 'bn': True, 'momentum': 0.95, 'initialization': 'orthogonal', 'GAE_lambda': 0.95, 'T': 256, 'actor_lr': 0

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▂▄▃▅▅▄▄▃▆▄▄▄▅▆▄▄▄▅▇▄▄▄▇▄▅▆▇▅█▄▅▄▄▄▆▄▄▄▆
Duration/Mean_val_ep_duration,▁▁▅▅▅▅▆█▅▅▆▆█▇▆▅▅▆▆▅▅▇▆▇▅▅▇▅▅▆▆▄▆▆▆▅▆▆▆▆
Learning_rate/Actor,█▆▅▄▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,█▆▅▄▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,█▁▂▂▂▄▃▃▃▂▂▂▂▂▁▂▃▂▂▂▂▂▃▃▁▂▃▂▃▁▂▁▄▂▃▃▁▃▁▁
Loss/Critic_loss,█▂▃▂▄▂▂▂▃▄▁▂▂▁▁▁▂▂▂▂▃▁▃▃▁▃▁▃▂▁▃▄▂▁▂▃▃▃▃▁
Loss/Entropy_bonus,█▆▅▂▁▃▃▃▄▃▂▂▂▂▂▁▂▁▂▂▂▁▂▃▁▂▃▂▂▃▂▂▁▂▃▂▂▂▂▂
Loss/KL_divergence,▄▅▄▇▇▆▇▁█▄▂▃▄▅▃▂▄▂▄▆▆▄▆▇▄▄▆▁▃▇▃▃▄▅▅▄▅▃█▅
Loss/Policy_loss,█▁▂▂▂▄▃▃▃▂▂▂▂▂▁▂▃▂▂▂▂▂▃▃▁▂▃▂▃▁▂▁▄▂▃▃▁▃▁▁
Loss/Regularized_Actor_loss,█▁▂▂▂▄▃▃▃▂▂▂▂▂▁▂▃▂▂▂▂▂▃▃▁▂▃▂▃▁▂▁▄▂▃▃▁▃▁▁

0,1
Duration/Mean_train_ep_duration,80.5
Duration/Mean_val_ep_duration,65.0
Learning_rate/Actor,0.0
Learning_rate/Critic,0.0
Loss/Actor_loss,5.62489
Loss/Critic_loss,3.7367
Loss/Entropy_bonus,1.52482
Loss/KL_divergence,0.00215
Loss/Policy_loss,5.67317
Loss/Regularized_Actor_loss,7.83989


[34m[1mwandb[0m: Agent Starting Run: shxgbxpq with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 256
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	actor_lr: 0.001092251356510318
[34m[1mwandb[0m: 	adv_std: False
[34m[1mwandb[0m: 	bn: True
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.003349115256717329
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.3
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.02976599424830743
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.8779820387389247
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	hidden_sizes: [350, 350, 250, 350]
[34m[1mwandb[0m: 	initialization: uniform
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 3.4447741988652544e-05
[34m[1mwandb[0m: 	l2_factor: 0.00019134493791455492
[34m[1mwandb[0m

Config del trial
{'GAE_lambda': 0.95, 'T': 256, 'activation': 'tanh', 'actor_lr': 0.001092251356510318, 'adv_std': False, 'bn': True, 'clipping_epsilon': 0.2, 'critic_lr': 0.003349115256717329, 'decay_method': 'exponential', 'dropout_prob': 0.3, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.02976599424830743, 'epochs': 10, 'exponential_factor': 0.8779820387389247, 'gamma': 0.99, 'hidden_sizes': [350, 350, 250, 350], 'initialization': 'uniform', 'input_size': 10, 'l1_factor': 3.4447741988652544e-05, 'l2_factor': 0.00019134493791455492, 'lrelu': 0.1, 'minibatch_size': 32, 'momentum': 0.8, 'output_size': 6, 'target_kl': 0.02, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [350, 350, 250, 350], 'output_size': 6, 'dropout_prob': 0.3, 'activation': 'tanh', 'lrelu': 0.1, 'bn': True, 'momentum': 0.8, 'initialization': 'uniform', 'GAE_lambda': 0.95, 'T': 256, 'actor_lr': 0.00109225135

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▄▃▄█▅▂▁▂▄▆▅▆▅▄▅▅▄▇▅▆▅▅▄▃▇▇▆▅▅▅▅▆▅▆▆▅██▅▇
Duration/Mean_val_ep_duration,▁▆▃▂█▄▆▆▆▅▆▅▆▅▆▆▆▅▇▆▅▆▇▅▆▇▆▅▆▅▆▅▆▄▆▆▆▅▆▅
Learning_rate/Actor,█▆▅▄▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,█▆▅▄▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,▂▁▄▃▃▃█▄▄▃▅▆▄▄▄▃▄▂▂▄▃▄▂▂▃▆▄▃▄▄▃▃▃▃▂▃▄▆▂▄
Loss/Critic_loss,▆▄▁▆▅▄█▄▃▃▂▅▂▂▃▂▅▃▂▃▃▄▂▂▄▃▄▂▁▃▁▃▂▃▃▄▃▂▄▅
Loss/Entropy_bonus,█▇▄▄▅▆▆▅▂▂▂▂▂▄▃▃▃▄▃▃▁▁▂▂▁▄▁▁▂▂▂▁▂▂▃▄▂▄▂▃
Loss/KL_divergence,▅▃▃██▂█▆▄▃▆▃▃▃▆▃▂▅▆▄▆▇▃▄█▄▆▄▅█▃▃▁▄▃▂▆▄▅▇
Loss/Policy_loss,▂▁▄▃▃▃█▄▄▃▅▆▄▄▄▃▄▂▂▄▃▄▂▂▃▆▄▃▄▄▃▃▃▃▂▃▄▆▂▄
Loss/Regularized_Actor_loss,▂▁▄▃▃▃█▄▄▃▅▆▄▄▄▃▄▂▂▄▃▄▂▂▃▆▄▃▄▄▃▃▃▃▂▃▄▆▂▄

0,1
Duration/Mean_train_ep_duration,80.0
Duration/Mean_val_ep_duration,59.4
Learning_rate/Actor,0.0
Learning_rate/Critic,0.0
Loss/Actor_loss,25.75761
Loss/Critic_loss,11.70727
Loss/Entropy_bonus,1.23926
Loss/KL_divergence,0.03376
Loss/Policy_loss,25.79449
Loss/Regularized_Actor_loss,27.12183


[34m[1mwandb[0m: Agent Starting Run: vcqkp026 with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 256
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	actor_lr: 0.0030913991713651533
[34m[1mwandb[0m: 	adv_std: False
[34m[1mwandb[0m: 	bn: True
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.0009607361440878124
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.1
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.023202226276275342
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.9183952723226634
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	hidden_sizes: [150, 350, 150, 250]
[34m[1mwandb[0m: 	initialization: uniform
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 5.092305459543365e-05
[34m[1mwandb[0m: 	l2_factor: 6.173114148384762e-06
[34m[1mwandb[0

Config del trial
{'GAE_lambda': 0.95, 'T': 256, 'activation': 'tanh', 'actor_lr': 0.0030913991713651533, 'adv_std': False, 'bn': True, 'clipping_epsilon': 0.2, 'critic_lr': 0.0009607361440878124, 'decay_method': 'exponential', 'dropout_prob': 0.1, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.023202226276275342, 'epochs': 10, 'exponential_factor': 0.9183952723226634, 'gamma': 0.99, 'hidden_sizes': [150, 350, 150, 250], 'initialization': 'uniform', 'input_size': 10, 'l1_factor': 5.092305459543365e-05, 'l2_factor': 6.173114148384762e-06, 'lrelu': 0.001, 'minibatch_size': 64, 'momentum': 0.8, 'output_size': 6, 'target_kl': 0.01, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [150, 350, 150, 250], 'output_size': 6, 'dropout_prob': 0.1, 'activation': 'tanh', 'lrelu': 0.001, 'bn': True, 'momentum': 0.8, 'initialization': 'uniform', 'GAE_lambda': 0.95, 'T': 256, 'actor_lr': 0.003091

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▄▁▄▆█▇▅▆▅▅▆▅▄▆▆▆▅▆▆▅▅▄▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅
Duration/Mean_val_ep_duration,▁▃▄▅█▃▅▅▅▃▃▄▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▂▃▃
Learning_rate/Actor,█▇▆▅▅▄▃▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,█▇▆▅▅▄▃▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,▄▂▅▂▁▃▅▂▄▄▄▄▅▇▄▆▄▅▄▄▅▃▄▆▅▄▆▇▅▆▆▆▄█▅▇▅▇▇▄
Loss/Critic_loss,▅▃█▄▄▅▅▃▆▇▄▅▅▇▂▅▃▄▂▁▃▂▂▄▃▂▅▄▃▄▄▄▂▆▂▄▃▄▅▃
Loss/Entropy_bonus,█▇▆▆▇▆▅▅▄▃▃▃▂▃▂▂▂▂▂▁▁▁▁▁▁▁▂▁▁▁▁▁▁▂▁▁▂▂▁▂
Loss/KL_divergence,▆▆▇█▅▆▇█▆▁▆▃▆▅▄▆▄▃▃▄▂▃▄▅▃▃▂▆▄▄▄▅▃▆▄▆▂▄▅▁
Loss/Policy_loss,▄▂▅▂▁▃▅▂▄▄▄▄▅▇▄▆▄▅▄▄▅▃▄▆▅▄▆▇▅▆▆▆▄█▅▇▅▇▇▄
Loss/Regularized_Actor_loss,▄▂▅▂▁▃▅▂▄▄▄▄▅▇▄▆▄▅▄▄▅▃▄▆▅▄▆▇▅▆▆▆▄█▅▇▅▇▇▄

0,1
Duration/Mean_train_ep_duration,54.33333
Duration/Mean_val_ep_duration,51.9
Learning_rate/Actor,0.0
Learning_rate/Critic,0.0
Loss/Actor_loss,13.93889
Loss/Critic_loss,12.80281
Loss/Entropy_bonus,0.60491
Loss/KL_divergence,-0.04137
Loss/Policy_loss,13.95292
Loss/Regularized_Actor_loss,14.58829


[34m[1mwandb[0m: Agent Starting Run: t578xlj3 with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 256
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	actor_lr: 0.0008504388110560009
[34m[1mwandb[0m: 	adv_std: False
[34m[1mwandb[0m: 	bn: False
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.0017274736131631004
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.2
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.009630122685964766
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.8615059698247903
[34m[1mwandb[0m: 	gamma: 0.95
[34m[1mwandb[0m: 	hidden_sizes: [150, 250, 250, 150]
[34m[1mwandb[0m: 	initialization: orthogonal
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 6.595268777844997e-06
[34m[1mwandb[0m: 	l2_factor: 7.583700308588196e-05
[34m[1mwand

Config del trial
{'GAE_lambda': 0.95, 'T': 256, 'activation': 'tanh', 'actor_lr': 0.0008504388110560009, 'adv_std': False, 'bn': False, 'clipping_epsilon': 0.2, 'critic_lr': 0.0017274736131631004, 'decay_method': 'exponential', 'dropout_prob': 0.2, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.009630122685964766, 'epochs': 10, 'exponential_factor': 0.8615059698247903, 'gamma': 0.95, 'hidden_sizes': [150, 250, 250, 150], 'initialization': 'orthogonal', 'input_size': 10, 'l1_factor': 6.595268777844997e-06, 'l2_factor': 7.583700308588196e-05, 'lrelu': 0.01, 'minibatch_size': 128, 'momentum': 0.9, 'output_size': 6, 'target_kl': 0.03, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [150, 250, 250, 150], 'output_size': 6, 'dropout_prob': 0.2, 'activation': 'tanh', 'lrelu': 0.01, 'bn': False, 'momentum': 0.9, 'initialization': 'orthogonal', 'GAE_lambda': 0.95, 'T': 256, 'actor_lr': 0

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▂▅▄▄▄▃▄▄▆▅▆▅▆▆▆▆▅▆▅█▅▆▆▇▆▆▅▆▆▆▆▆▆▆▆▅▅▄▆
Duration/Mean_val_ep_duration,▁▄▂▄▃▄▅▅█▆▇▇▆▇▆▆▆▅▆▇▇▇▇▆▅█▆▆▅▆▆▆▅▇▆▅█▆▆▇
Learning_rate/Actor,█▆▅▄▃▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,█▆▅▄▃▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,▆▅██▇▇▅▄▁▃▄▄▃▃▅▂▃▅▄▅▁▅▂▄▁▄▄▂▄▃▁▅▄▄▃▃▃▄▅▃
Loss/Critic_loss,█▆▆▅▆▆▅▃▂▃▃▄▃▂▃▁▃▃▃▃▂▃▂▂▂▂▃▁▂▂▂▃▃▂▂▂▃▂▄▂
Loss/Entropy_bonus,█▇▆▆▅▄▃▃▃▃▂▂▂▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/KL_divergence,▅▅▅▂▂▆█▃▁▇▄▃▅▃▃▂▃▃▃▂▃▂▄▄▂▃▂▁▂▂▁▄▃▃▂▃▂▃▂▂
Loss/Policy_loss,▆▅██▇▇▅▄▁▃▄▄▃▃▅▂▃▅▄▅▁▅▂▄▁▄▄▂▄▃▁▅▄▄▃▃▃▄▅▃
Loss/Regularized_Actor_loss,▆▅██▇▇▅▄▁▃▄▄▃▃▅▂▃▅▄▅▁▅▂▄▁▄▄▂▄▃▁▅▄▄▃▃▃▄▅▃

0,1
Duration/Mean_train_ep_duration,63.33333
Duration/Mean_val_ep_duration,66.9
Learning_rate/Actor,0.0
Learning_rate/Critic,0.0
Loss/Actor_loss,7.75061
Loss/Critic_loss,7.75194
Loss/Entropy_bonus,1.109
Loss/KL_divergence,-0.0101
Loss/Policy_loss,7.76129
Loss/Regularized_Actor_loss,7.90148


[34m[1mwandb[0m: Agent Starting Run: 8vfoy2ai with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 512
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	actor_lr: 0.0006364036308445669
[34m[1mwandb[0m: 	adv_std: True
[34m[1mwandb[0m: 	bn: False
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.00038795282076171126
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.1
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.00026168117600784815
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.9242611684482492
[34m[1mwandb[0m: 	gamma: 0.9
[34m[1mwandb[0m: 	hidden_sizes: [350, 350, 250]
[34m[1mwandb[0m: 	initialization: uniform
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 1.0649529336677958e-05
[34m[1mwandb[0m: 	l2_factor: 1.4858877217694717e-06
[34m[1mwandb[0m

Config del trial
{'GAE_lambda': 0.95, 'T': 512, 'activation': 'tanh', 'actor_lr': 0.0006364036308445669, 'adv_std': True, 'bn': False, 'clipping_epsilon': 0.2, 'critic_lr': 0.00038795282076171126, 'decay_method': 'exponential', 'dropout_prob': 0.1, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.00026168117600784815, 'epochs': 10, 'exponential_factor': 0.9242611684482492, 'gamma': 0.9, 'hidden_sizes': [350, 350, 250], 'initialization': 'uniform', 'input_size': 10, 'l1_factor': 1.0649529336677958e-05, 'l2_factor': 1.4858877217694717e-06, 'lrelu': 0.001, 'minibatch_size': 256, 'momentum': 0.99, 'output_size': 6, 'target_kl': 0.02, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [350, 350, 250], 'output_size': 6, 'dropout_prob': 0.1, 'activation': 'tanh', 'lrelu': 0.001, 'bn': False, 'momentum': 0.99, 'initialization': 'uniform', 'GAE_lambda': 0.95, 'T': 512, 'actor_lr': 0.00063640

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▃▄▄▇█▇▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▅▇▇▇█▇▇█
Duration/Mean_val_ep_duration,▁▃▅▆▆▇▇▇▇▇▇▇▇▇█▇▇█▇▇██▇█████████▇███████
Learning_rate/Actor,█▇▆▅▄▄▃▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,█▇▆▅▄▄▃▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,▁▃▄▃▃▃▅▃▄▄▆▅▅▄▆▅▆▆▅▆▅▇▆▅▆▆▆▄█▆▆▇▇▆▆▆▇█▆▅
Loss/Critic_loss,█▃▃▂▂▁▁▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▁▁▁▂▁▁▂▁▁▁▁
Loss/Entropy_bonus,█▇▇▆▅▄▃▃▃▂▂▁▂▁▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/KL_divergence,▄▅▄▇▆▆█▄▃▂▄▃▂▂▃▁▂▂▂▂▃▂▂▂▃▂▂▂▃▁▂▂▂▂▂▁▃▂▃▂
Loss/Policy_loss,▁▃▄▃▃▃▅▃▄▄▆▅▅▄▆▅▆▆▅▆▅▇▆▅▆▆▆▄█▆▆▇▇▆▆▆▇█▆▅
Loss/Regularized_Actor_loss,▁▃▄▂▂▂▅▂▃▃▅▄▅▃▅▅▅▅▄▅▅▇▆▄▅▅▆▂█▅▅▆▆▅▆▅▆█▅▄

0,1
Duration/Mean_train_ep_duration,137.0
Duration/Mean_val_ep_duration,128.0
Learning_rate/Actor,0.0
Learning_rate/Critic,0.0
Loss/Actor_loss,-0.00833
Loss/Critic_loss,1.68618
Loss/Entropy_bonus,0.42922
Loss/KL_divergence,-0.00862
Loss/Policy_loss,-0.00821
Loss/Regularized_Actor_loss,0.11745


[34m[1mwandb[0m: Agent Starting Run: t2drmuwi with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 512
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	actor_lr: 0.004615122567006594
[34m[1mwandb[0m: 	adv_std: False
[34m[1mwandb[0m: 	bn: True
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.0010286915583778942
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.2
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.015364899753243365
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.9892590384753888
[34m[1mwandb[0m: 	gamma: 0.95
[34m[1mwandb[0m: 	hidden_sizes: [350, 350, 150, 250]
[34m[1mwandb[0m: 	initialization: normal
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 0.0006194302505217875
[34m[1mwandb[0m: 	l2_factor: 0.0003090031218212567
[34m[1mwandb[0m:

Config del trial
{'GAE_lambda': 0.95, 'T': 512, 'activation': 'tanh', 'actor_lr': 0.004615122567006594, 'adv_std': False, 'bn': True, 'clipping_epsilon': 0.2, 'critic_lr': 0.0010286915583778942, 'decay_method': 'exponential', 'dropout_prob': 0.2, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.015364899753243365, 'epochs': 10, 'exponential_factor': 0.9892590384753888, 'gamma': 0.95, 'hidden_sizes': [350, 350, 150, 250], 'initialization': 'normal', 'input_size': 10, 'l1_factor': 0.0006194302505217875, 'l2_factor': 0.0003090031218212567, 'lrelu': 0.001, 'minibatch_size': 256, 'momentum': 0.99, 'output_size': 6, 'target_kl': 0.03, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [350, 350, 150, 250], 'output_size': 6, 'dropout_prob': 0.2, 'activation': 'tanh', 'lrelu': 0.001, 'bn': True, 'momentum': 0.99, 'initialization': 'normal', 'GAE_lambda': 0.95, 'T': 512, 'actor_lr': 0.004615

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▂▂▂▁▁▂▂▂▂▃▂▃▃▃▅▅▅▅▅▅▅▅▆▅▆▆▆▆▇▆▇█▆▅▅▅▆▆▆▆
Duration/Mean_val_ep_duration,▃▃▁▁▁▃▃▃▃▃▃▃▃▆▆▆▆▆▆▆▆▇▆▇▆▆▇▇▇███▇▇▇▇▆▇▇▇
Learning_rate/Actor,██▇▇▇▇▆▆▆▆▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁
Learning_rate/Critic,██▇▇▇▇▆▆▆▆▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁
Loss/Actor_loss,▂▂▄▅█▄▃▃▄▃▃▃▃▂▂▁▂▂▂▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Critic_loss,▂▂▄▄█▂▃▂▁▂▁▁▁▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Entropy_bonus,█▅▆▂▂▂▂▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂
Loss/KL_divergence,▅▄▁▆▂▅▅▆█▆▆▆▆▅▅▆▇█▇▆▅▅▅▅▅▆▅▅▅▅▆▄▄▆▆▃▄▄▆▃
Loss/Policy_loss,▂▂▄▅█▄▃▃▄▃▃▃▃▂▂▁▂▂▂▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Regularized_Actor_loss,▃▂▄▅█▅▃▃▅▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
Duration/Mean_train_ep_duration,145.5
Duration/Mean_val_ep_duration,148.39999
Learning_rate/Actor,0.00138
Learning_rate/Critic,0.00031
Loss/Actor_loss,-2.51786
Loss/Critic_loss,2.49072
Loss/Entropy_bonus,0.45368
Loss/KL_divergence,-0.02709
Loss/Policy_loss,-2.51089
Loss/Regularized_Actor_loss,3.65584


[34m[1mwandb[0m: Agent Starting Run: 72a9thc8 with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 1024
[34m[1mwandb[0m: 	activation: lrelu
[34m[1mwandb[0m: 	actor_lr: 0.001073992188263431
[34m[1mwandb[0m: 	adv_std: True
[34m[1mwandb[0m: 	bn: True
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.0004644310990877041
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.3
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.04832774527898613
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.9364983238154204
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	hidden_sizes: [150, 350, 150]
[34m[1mwandb[0m: 	initialization: orthogonal
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 0.0006296698509568597
[34m[1mwandb[0m: 	l2_factor: 7.950330435768783e-05
[34m[1mwandb[0m: 

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

Config del trial
{'GAE_lambda': 0.95, 'T': 1024, 'activation': 'lrelu', 'actor_lr': 0.001073992188263431, 'adv_std': True, 'bn': True, 'clipping_epsilon': 0.2, 'critic_lr': 0.0004644310990877041, 'decay_method': 'exponential', 'dropout_prob': 0.3, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.04832774527898613, 'epochs': 10, 'exponential_factor': 0.9364983238154204, 'gamma': 0.99, 'hidden_sizes': [150, 350, 150], 'initialization': 'orthogonal', 'input_size': 10, 'l1_factor': 0.0006296698509568597, 'l2_factor': 7.950330435768783e-05, 'lrelu': 0.001, 'minibatch_size': 128, 'momentum': 0.9, 'output_size': 6, 'target_kl': 0.01, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [150, 350, 150], 'output_size': 6, 'dropout_prob': 0.3, 'activation': 'lrelu', 'lrelu': 0.001, 'bn': True, 'momentum': 0.9, 'initialization': 'orthogonal', 'GAE_lambda': 0.95, 'T': 1024, 'actor_lr': 0.00107399

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▁▂▂▂▃▃▃▃▄▃▇▃▄▄▅▅▅▆▃▄▆█▃▃▅█▅▆▆▇▆▆▄█▇▅▅▅▄
Duration/Mean_val_ep_duration,▁▂▁▁▂▃▃▃▂▃▅▄▄▅▃▅▃▄▃▅▆▅▇▆▅▆▆▇▆▆█▅▆▅▆▅▄▅▆▃
Learning_rate/Actor,█▇▆▅▄▃▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,█▇▆▅▄▃▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,▂▄▅▄▄▁▂▂▁▃▁▃▂▆▅▂▁▃▃▃▁▃▃▄▃▄▃▃▂▅▄▃▄▄▅▅▅▅█▄
Loss/Critic_loss,█▅▄▄▂▃▅▃▃▄▄▃▄▃▃▂▂▂▂▂▂▃▁▃▂▂▂▂▃▁▂▁▁▂▃▃▂▂▂▄
Loss/Entropy_bonus,█▆▄▄▆▃▁▃▅▄▃▅▄▅▄▄▄▄▄▃▃▄▄▃▃▃▃▄▄▄▃▄▄▄▃▄▄▄▄▃
Loss/KL_divergence,▄▇█▅▄▄▄▁▃▅▂▄▆▁▅▃▁▇▃▂▇▄▄▄▄▃▅▇▄▄▃▆▁▄▄▃▄▃▄▃
Loss/Policy_loss,▂▄▅▄▄▁▂▂▁▂▁▄▂▆▅▂▁▃▃▃▁▃▃▄▃▄▃▃▂▅▄▃▄▄▅▅▅▅█▄
Loss/Regularized_Actor_loss,█▇▅▄▄▃▃▂▂▂▁▂▁▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▁▂▂▁

0,1
Duration/Mean_train_ep_duration,295.66666
Duration/Mean_val_ep_duration,233.7
Learning_rate/Actor,0.0
Learning_rate/Critic,0.0
Loss/Actor_loss,-0.07512
Loss/Critic_loss,9.52931
Loss/Entropy_bonus,1.70513
Loss/KL_divergence,0.00062
Loss/Policy_loss,0.00729
Loss/Regularized_Actor_loss,0.01188


[34m[1mwandb[0m: Agent Starting Run: 19uogdlz with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 1024
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	actor_lr: 0.004302443606632136
[34m[1mwandb[0m: 	adv_std: True
[34m[1mwandb[0m: 	bn: True
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.003397302806480466
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.1
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.004417164652072709
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.9473384380590676
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	hidden_sizes: [150, 150, 350, 150]
[34m[1mwandb[0m: 	initialization: normal
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 1.232689808248475e-06
[34m[1mwandb[0m: 	l2_factor: 0.000836478146930121
[34m[1mwandb[0m: 	

Config del trial
{'GAE_lambda': 0.95, 'T': 1024, 'activation': 'tanh', 'actor_lr': 0.004302443606632136, 'adv_std': True, 'bn': True, 'clipping_epsilon': 0.2, 'critic_lr': 0.003397302806480466, 'decay_method': 'exponential', 'dropout_prob': 0.1, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.004417164652072709, 'epochs': 10, 'exponential_factor': 0.9473384380590676, 'gamma': 0.99, 'hidden_sizes': [150, 150, 350, 150], 'initialization': 'normal', 'input_size': 10, 'l1_factor': 1.232689808248475e-06, 'l2_factor': 0.000836478146930121, 'lrelu': 0.01, 'minibatch_size': 32, 'momentum': 0.8, 'output_size': 6, 'target_kl': 0.02, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [150, 150, 350, 150], 'output_size': 6, 'dropout_prob': 0.1, 'activation': 'tanh', 'lrelu': 0.01, 'bn': True, 'momentum': 0.8, 'initialization': 'normal', 'GAE_lambda': 0.95, 'T': 1024, 'actor_lr': 0.004302443606

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▂▁▂▂▂▁▂▂▂▂▂▂▂▃▄▅▇▆▅█▆▆█▇▇▆█▇█▆▇████▇█▇▇
Duration/Mean_val_ep_duration,▁▂▃▂▃▂▂▂▂▂▂▂▃▃▆▅▆▅▇█▆▇▇▇▇█████████▇██▇▇█
Learning_rate/Actor,█▇▆▄▄▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,█▇▆▄▄▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,▃▂▂▄▃▃▃▄▄▅▃▃▄▂▄▄▁▄▆▅▄▄▃▅▃▃▅▃▃█▆▅▅█▅▆▃▄▃▂
Loss/Critic_loss,█▄▄▅▃▄▂▂▃▂▃▁▃▄▄▅▄▆▃▃▄▄▂▂▂▁▂▂▃▂▁▃▃▂▂▂▂▅▂▂
Loss/Entropy_bonus,█▇▇▇▆▃▂▂▂▂▂▂▃▃▃▂▂▂▂▂▃▁▂▂▂▁▁▁▂▁▁▁▁▁▂▁▁▁▁▁
Loss/KL_divergence,▄▅▄▆▆▆▇█▅▁▃▃▄▅▆▇▃▃▆▆▆▅▃▆▅▆█▅▄▆▆▆▆▆▆▆▅▂▆▁
Loss/Policy_loss,▃▂▃▄▃▃▃▄▄▅▃▃▄▂▄▄▁▄▆▅▄▄▃▅▃▃▅▃▃█▆▅▅█▅▆▃▄▃▂
Loss/Regularized_Actor_loss,▅▂▂▄▃▃▃▄▄▄▃▂▃▂▄▄▁▄▆▅▄▄▃▅▃▃▅▃▃█▆▅▅█▅▆▃▄▃▂

0,1
Duration/Mean_train_ep_duration,120.75
Duration/Mean_val_ep_duration,122.9
Learning_rate/Actor,0.0
Learning_rate/Critic,0.0
Loss/Actor_loss,0.02042
Loss/Critic_loss,5.26826
Loss/Entropy_bonus,0.71534
Loss/KL_divergence,0.05381
Loss/Policy_loss,0.02358
Loss/Regularized_Actor_loss,0.0339


[34m[1mwandb[0m: Agent Starting Run: rssqc96i with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 768
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	actor_lr: 0.0002815214076083508
[34m[1mwandb[0m: 	adv_std: False
[34m[1mwandb[0m: 	bn: False
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.00026503208922109303
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.1
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.02421363547792713
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.8869244139870407
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	hidden_sizes: [350, 350, 150, 350]
[34m[1mwandb[0m: 	initialization: orthogonal
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 1.3193793347284496e-06
[34m[1mwandb[0m: 	l2_factor: 0.00034037292192767816
[34m[1mwa

Config del trial
{'GAE_lambda': 0.95, 'T': 768, 'activation': 'tanh', 'actor_lr': 0.0002815214076083508, 'adv_std': False, 'bn': False, 'clipping_epsilon': 0.2, 'critic_lr': 0.00026503208922109303, 'decay_method': 'exponential', 'dropout_prob': 0.1, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.02421363547792713, 'epochs': 10, 'exponential_factor': 0.8869244139870407, 'gamma': 0.99, 'hidden_sizes': [350, 350, 150, 350], 'initialization': 'orthogonal', 'input_size': 10, 'l1_factor': 1.3193793347284496e-06, 'l2_factor': 0.00034037292192767816, 'lrelu': 0.01, 'minibatch_size': 128, 'momentum': 0.95, 'output_size': 6, 'target_kl': 0.03, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [350, 350, 150, 350], 'output_size': 6, 'dropout_prob': 0.1, 'activation': 'tanh', 'lrelu': 0.01, 'bn': False, 'momentum': 0.95, 'initialization': 'orthogonal', 'GAE_lambda': 0.95, 'T': 768, 'actor_lr

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▃▃▃▅▄▅▅████████▇█▇███▇▇▇▇▇██▇▇▇▇▇▇▇▇██▇
Duration/Mean_val_ep_duration,▁▂▃▄▄▄▅▆█▇███▇▇█▇█████▇█▇█▇███▇▇█▇▇▇▇▇▇█
Learning_rate/Actor,█▇▅▄▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,█▇▅▄▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,█▅▅▅▅▅▄▂▂▁▁▂▁▂▁▂▂▁▂▂▂▁▁▂▂▁▁▂▂▃▂▂▁▃▂▃▃▂▂▂
Loss/Critic_loss,█▆▃▃▃▂▃▃▂▁▁▁▁▂▁▂▁▂▁▂▁▁▁▁▁▁▁▁▁▃▂▁▁▂▁▂▂▁▁▂
Loss/Entropy_bonus,██▆▆▅▅▅▄▃▃▂▃▂▂▂▂▂▁▂▂▂▁▁▁▁▁▁▁▂▁▂▁▁▂▂▂▂▁▂▂
Loss/KL_divergence,▇▅██▅▅▁█▆▆▅▃▄▅▄▄▃▂▃▃▃▄▃▃▂▄▃▃▄▅▃▂▁▄▁▃▃▃▃▃
Loss/Policy_loss,█▅▅▅▅▅▄▂▂▁▁▂▁▂▁▂▂▁▂▂▂▁▁▂▂▁▁▂▂▃▂▂▁▃▂▃▃▂▂▂
Loss/Regularized_Actor_loss,█▅▅▅▅▅▄▂▂▁▁▂▁▂▁▂▂▁▂▂▂▁▁▂▂▁▁▂▂▃▂▂▁▃▂▃▃▂▂▂

0,1
Duration/Mean_train_ep_duration,117.0
Duration/Mean_val_ep_duration,121.8
Learning_rate/Actor,0.0
Learning_rate/Critic,0.0
Loss/Actor_loss,3.18405
Loss/Critic_loss,4.81624
Loss/Entropy_bonus,0.90379
Loss/KL_divergence,-0.01304
Loss/Policy_loss,3.20593
Loss/Regularized_Actor_loss,3.64118


[34m[1mwandb[0m: Agent Starting Run: 8c030a2c with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 768
[34m[1mwandb[0m: 	activation: lrelu
[34m[1mwandb[0m: 	actor_lr: 0.003361993105234902
[34m[1mwandb[0m: 	adv_std: True
[34m[1mwandb[0m: 	bn: True
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.001925392297256925
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.1
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.03267285320372514
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.9017317783193952
[34m[1mwandb[0m: 	gamma: 0.9
[34m[1mwandb[0m: 	hidden_sizes: [350, 150, 250, 150]
[34m[1mwandb[0m: 	initialization: uniform
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 1.0816545197750643e-05
[34m[1mwandb[0m: 	l2_factor: 6.657119750973004e-05
[34m[1mwandb[0m: 

Config del trial
{'GAE_lambda': 0.95, 'T': 768, 'activation': 'lrelu', 'actor_lr': 0.003361993105234902, 'adv_std': True, 'bn': True, 'clipping_epsilon': 0.2, 'critic_lr': 0.001925392297256925, 'decay_method': 'exponential', 'dropout_prob': 0.1, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.03267285320372514, 'epochs': 10, 'exponential_factor': 0.9017317783193952, 'gamma': 0.9, 'hidden_sizes': [350, 150, 250, 150], 'initialization': 'uniform', 'input_size': 10, 'l1_factor': 1.0816545197750643e-05, 'l2_factor': 6.657119750973004e-05, 'lrelu': 0.01, 'minibatch_size': 32, 'momentum': 0.99, 'output_size': 6, 'target_kl': 0.02, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [350, 150, 250, 150], 'output_size': 6, 'dropout_prob': 0.1, 'activation': 'lrelu', 'lrelu': 0.01, 'bn': True, 'momentum': 0.99, 'initialization': 'uniform', 'GAE_lambda': 0.95, 'T': 768, 'actor_lr': 0.00336199

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▁▂▁▃▃▃▄▃▃▃▃▅▄▄▅▆█▆▄▅▅▅▇▆▄▄▅▄▄▄▄▅▅▄▅▅▃▅▅
Duration/Mean_val_ep_duration,▁▂▂▂▃▂▄▂▃▂▃▅▃▆▅▆▇██▅█▅▆▆▇▆▅▅▄▄▅▇▅▃▅▅▄▃▄▅
Learning_rate/Actor,█▇▆▅▄▄▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,█▇▆▅▄▄▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,▂█▁▁▂▂▂▁▁▂▁▁▁▁▁▂▁▁▁▁▂▁▁▂▁▂▁▂▃▁▃▁▂▂▂▁▂▁▂▂
Loss/Critic_loss,▄▄▅▇██▅▅▇▂▃▂▃▃▁▂▃▂▂▄▁▅▂▆▃▂▃▂▃▁▂▄▂▂▂▂▁▄▃▄
Loss/Entropy_bonus,█▁▂▄▄▅▅▆▇▆▇▇▇▆▆▆▆▆▆▇▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆
Loss/KL_divergence,▅▅▁█▆▆▃▅▅▁▂▄▅▄▃▄▂▁▂▃▅▃▂▅▃▃▅▃▃▄▂▄▅▃▄▅▃▄▂▄
Loss/Policy_loss,▂█▁▁▂▂▂▁▁▂▁▂▂▁▁▂▁▁▁▁▂▂▁▂▂▂▁▂▃▁▃▁▂▂▂▁▂▁▂▂
Loss/Regularized_Actor_loss,▃█▁▂▃▂▂▁▁▂▁▁▁▁▁▂▁▁▁▁▂▁▁▂▁▂▁▂▃▁▂▁▂▂▂▁▂▁▂▂

0,1
Duration/Mean_train_ep_duration,89.71429
Duration/Mean_val_ep_duration,92.1
Learning_rate/Actor,0.0
Learning_rate/Critic,0.0
Loss/Actor_loss,0.0171
Loss/Critic_loss,7.29536
Loss/Entropy_bonus,1.41208
Loss/KL_divergence,0.02052
Loss/Policy_loss,0.06323
Loss/Regularized_Actor_loss,0.05242


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: vn5vzwr3 with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 256
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	actor_lr: 0.003696374819121163
[34m[1mwandb[0m: 	adv_std: True
[34m[1mwandb[0m: 	bn: True
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.005283820588164741
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.009656928182571824
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.9376372955680602
[34m[1mwandb[0m: 	gamma: 0.9
[34m[1mwandb[0m: 	hidden_sizes: [350, 350, 250, 150]
[34m[1mwandb[0m: 	initialization: orthogonal
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 1.0407801839

Config del trial
{'GAE_lambda': 0.95, 'T': 256, 'activation': 'tanh', 'actor_lr': 0.003696374819121163, 'adv_std': True, 'bn': True, 'clipping_epsilon': 0.2, 'critic_lr': 0.005283820588164741, 'decay_method': 'exponential', 'dropout_prob': 0, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.009656928182571824, 'epochs': 10, 'exponential_factor': 0.9376372955680602, 'gamma': 0.9, 'hidden_sizes': [350, 350, 250, 150], 'initialization': 'orthogonal', 'input_size': 10, 'l1_factor': 1.0407801839131973e-06, 'l2_factor': 3.4377808513506626e-05, 'lrelu': 0.001, 'minibatch_size': 64, 'momentum': 0.8, 'output_size': 6, 'target_kl': 0.01, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [350, 350, 250, 150], 'output_size': 6, 'dropout_prob': 0, 'activation': 'tanh', 'lrelu': 0.001, 'bn': True, 'momentum': 0.8, 'initialization': 'orthogonal', 'GAE_lambda': 0.95, 'T': 256, 'actor_lr': 0.003696

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▂▂▃▁▂▇▇▅▄█▆▅█▇▅▄▅▄▅▅▄▃▅▅▃▅▃▄▂▄▄▄▄▅▅▅▃▄▄▄
Duration/Mean_val_ep_duration,▁▂▃▅▆█▆█▆▇▇▇▆▇▇▅▆▆▄▄▄▅▃▅▄▆▄▅▄▅▄▅▄▄▄▄▄▄▄▃
Learning_rate/Actor,██▇▇▆▆▅▅▅▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,██▇▇▆▆▅▅▅▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,▆▆▁▆▃▄▃█▃▄▃▂▇▆▅▄▃▃▄▃▄▃▅▄▂▆▇▅▆▆▄▃▂▄▇▂▄▅▅▄
Loss/Critic_loss,▇▅█▃█▃▃▂▃▃▂▂▄▂▂▁▂▃▂▂▃▂▆▅▃▄▂▃▃▃▄▃▅▃▃▂▂▃▄▃
Loss/Entropy_bonus,█▇▇▆▆▄▄▄▄▃▄▃▃▃▄▃▄▃▃▃▃▃▃▂▂▃▃▃▃▃▁▂▂▁▂▃▁▁▂▁
Loss/KL_divergence,▅▅▄▇▅█▄▆▆▇▆▅▅▅▇▆▃▃▅█▅▅▅▁▃▅▆▆▅▇▅▆▅▆▆▂▅▅▅▆
Loss/Policy_loss,▆▆▁▆▃▄▂█▃▄▃▂▇▆▄▄▃▃▃▃▄▂▄▃▁▅▇▅▆▆▄▂▂▄▇▁▄▄▅▄
Loss/Regularized_Actor_loss,██▃▇▄▅▄█▄▅▃▃▇▆▅▄▃▃▃▃▄▂▄▃▂▄▆▄▅▅▃▂▂▃▅▁▃▄▄▃

0,1
Duration/Mean_train_ep_duration,75.0
Duration/Mean_val_ep_duration,61.8
Learning_rate/Actor,0.00018
Learning_rate/Critic,0.00026
Loss/Actor_loss,-0.01851
Loss/Critic_loss,6.5108
Loss/Entropy_bonus,0.61902
Loss/KL_divergence,0.01866
Loss/Policy_loss,-0.01254
Loss/Regularized_Actor_loss,0.03372


[34m[1mwandb[0m: Agent Starting Run: nvyymnmu with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 256
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	actor_lr: 0.004957200393090042
[34m[1mwandb[0m: 	adv_std: False
[34m[1mwandb[0m: 	bn: True
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.007403455859055538
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.01706657462797869
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.9776352405257124
[34m[1mwandb[0m: 	gamma: 0.9
[34m[1mwandb[0m: 	hidden_sizes: [150, 250, 250, 350]
[34m[1mwandb[0m: 	initialization: normal
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 5.238344108372022e-06
[34m[1mwandb[0m: 	l2_factor: 4.867134452819901e-05
[34m[1mwandb[0m: 	lre

Config del trial
{'GAE_lambda': 0.95, 'T': 256, 'activation': 'tanh', 'actor_lr': 0.004957200393090042, 'adv_std': False, 'bn': True, 'clipping_epsilon': 0.2, 'critic_lr': 0.007403455859055538, 'decay_method': 'exponential', 'dropout_prob': 0, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.01706657462797869, 'epochs': 10, 'exponential_factor': 0.9776352405257124, 'gamma': 0.9, 'hidden_sizes': [150, 250, 250, 350], 'initialization': 'normal', 'input_size': 10, 'l1_factor': 5.238344108372022e-06, 'l2_factor': 4.867134452819901e-05, 'lrelu': 0.001, 'minibatch_size': 64, 'momentum': 0.8, 'output_size': 6, 'target_kl': 0.03, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [150, 250, 250, 350], 'output_size': 6, 'dropout_prob': 0, 'activation': 'tanh', 'lrelu': 0.001, 'bn': True, 'momentum': 0.8, 'initialization': 'normal', 'GAE_lambda': 0.95, 'T': 256, 'actor_lr': 0.0049572003930900

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▃▃▆▄▄▄▄▆▆▆▄▄▄▆▆▆▇▇▇▇▇█▇▇▇▇▇▇▇▇▇▇▇▇██▇▇▇
Duration/Mean_val_ep_duration,▁▅▄▆▄▅▆▇▇▇▅▄▄▄▇▇▇██▇▇███████████████████
Learning_rate/Actor,██▇▇▆▆▅▅▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,██▇▇▆▆▅▅▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,█▃▃▂▂▃▂▂▁▂▃▃▄▃▂▁▁▁▁▂▂▁▂▂▂▂▁▂▂▂▂▁▁▂▂▂▂▂▁▂
Loss/Critic_loss,█▃▂▂▁▂▂▂▂▂▂▃▂▁▂▁▁▁▁▂▁▁▂▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Entropy_bonus,██▄▄▃▅▃▂▁▁▂▁▂▂▂▁▂▂▂▂▂▁▂▃▂▂▂▂▂▃▂▂▂▂▂▃▃▃▂▂
Loss/KL_divergence,▆█▃▂▅▂▅▂▆▃▇▆▇▁▂▄▆▆▃▄▄▃▆▂▂▃▄▃▄▄▄▄▂▃▃▄▃▃▅▃
Loss/Policy_loss,█▃▃▂▂▃▂▂▁▂▃▃▄▃▂▁▁▁▁▂▂▁▂▂▂▂▁▂▂▂▂▁▁▂▂▂▂▂▁▂
Loss/Regularized_Actor_loss,█▃▃▂▂▃▂▂▁▂▃▃▄▃▂▁▁▁▁▂▂▁▂▂▂▂▁▂▂▂▂▁▁▂▂▂▂▂▁▂

0,1
Duration/Mean_train_ep_duration,102.5
Duration/Mean_val_ep_duration,101.8
Learning_rate/Actor,0.00015
Learning_rate/Critic,0.00022
Loss/Actor_loss,-0.17985
Loss/Critic_loss,0.68331
Loss/Entropy_bonus,0.31254
Loss/KL_divergence,-0.00861
Loss/Policy_loss,-0.17451
Loss/Regularized_Actor_loss,0.00971


[34m[1mwandb[0m: Agent Starting Run: fddzxfoo with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 256
[34m[1mwandb[0m: 	activation: lrelu
[34m[1mwandb[0m: 	actor_lr: 0.000637149133195217
[34m[1mwandb[0m: 	adv_std: True
[34m[1mwandb[0m: 	bn: True
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.00012636155029998258
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.033338718445943855
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.967362604612464
[34m[1mwandb[0m: 	gamma: 0.95
[34m[1mwandb[0m: 	hidden_sizes: [350, 150, 150, 150]
[34m[1mwandb[0m: 	initialization: normal
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 0.00011298675076990076
[34m[1mwandb[0m: 	l2_factor: 2.375812075517843e-05
[34m[1mwandb[0m: 

Config del trial
{'GAE_lambda': 0.95, 'T': 256, 'activation': 'lrelu', 'actor_lr': 0.000637149133195217, 'adv_std': True, 'bn': True, 'clipping_epsilon': 0.2, 'critic_lr': 0.00012636155029998258, 'decay_method': 'exponential', 'dropout_prob': 0, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.033338718445943855, 'epochs': 10, 'exponential_factor': 0.967362604612464, 'gamma': 0.95, 'hidden_sizes': [350, 150, 150, 150], 'initialization': 'normal', 'input_size': 10, 'l1_factor': 0.00011298675076990076, 'l2_factor': 2.375812075517843e-05, 'lrelu': 0.001, 'minibatch_size': 32, 'momentum': 0.99, 'output_size': 6, 'target_kl': 0.02, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [350, 150, 150, 150], 'output_size': 6, 'dropout_prob': 0, 'activation': 'lrelu', 'lrelu': 0.001, 'bn': True, 'momentum': 0.99, 'initialization': 'normal', 'GAE_lambda': 0.95, 'T': 256, 'actor_lr': 0.000637149

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▂▂▂▂▂▂▂▂▃▂▂▄▄▂▃▃▃▃▃▄▂▃▅▄▅▄▆▄▁█▄▄▂▃▃▂▃▃▃▄
Duration/Mean_val_ep_duration,▂▁▂▃▂▆▂▂▅▃▄▃▃▃▁▅▄▃▃█▄▅▄▅▅▃▇▄▄▅▂▆▃▄▄▃▄▅▄▇
Learning_rate/Actor,██▇▇▆▆▅▅▅▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,██▇▇▆▆▅▅▅▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,▂▂▁▂▂▃▄▃▃▂▆▅▃▅█▄▃▁▁▅▃▄▃▂▄▆▂▄▂█▄▆▆▃▃▂▄▄▅▄
Loss/Critic_loss,█▅▅▇▃▆▃▃▄▄▁▅▂▄▃▃▃▁▄▂▁▂▂▂▄▃▂▂▁█▂▃▃▁▂▂▂▃▁▃
Loss/Entropy_bonus,█▅▆▆▆▆▅▅▅▄▄▅▁▁▃▄▃▃▄▄▅▅▄▃▃▂▂▂▄▄▄▃▄▄▃▃▃▃▂▂
Loss/KL_divergence,▅▅▅▄▅▆▄▆▇▅▆▄▇█▇▅▆▄▆▇▄▄▄▃▇█▇▄▆█▇▂▄▅▃▅▁▇▄▃
Loss/Policy_loss,▂▂▂▂▃▃▄▃▃▂▆▅▃▅█▄▃▁▁▅▃▄▃▂▄▆▂▃▂█▄▆▆▃▃▂▄▄▄▃
Loss/Regularized_Actor_loss,█▆▅▄▃▃▃▃▂▂▃▂▂▂▃▂▂▁▁▂▁▂▂▁▂▂▁▂▁▂▂▂▂▁▁▁▂▁▂▁

0,1
Duration/Mean_train_ep_duration,89.0
Duration/Mean_val_ep_duration,127.5
Learning_rate/Actor,3e-05
Learning_rate/Critic,1e-05
Loss/Actor_loss,-0.09139
Loss/Critic_loss,6.5122
Loss/Entropy_bonus,1.48754
Loss/KL_divergence,-0.0103
Loss/Policy_loss,-0.0418
Loss/Regularized_Actor_loss,0.12616


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 4588ama9 with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 1024
[34m[1mwandb[0m: 	activation: lrelu
[34m[1mwandb[0m: 	actor_lr: 0.0003489571514584541
[34m[1mwandb[0m: 	adv_std: True
[34m[1mwandb[0m: 	bn: False
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.00013453971024447964
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.3
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.005875806148181214
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.8761136585214196
[34m[1mwandb[0m: 	gamma: 0.95
[34m[1mwandb[0m: 	hidden_sizes: [150, 350]
[34m[1mwandb[0m: 	initialization: uniform
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 2.65026891603412

Config del trial
{'GAE_lambda': 0.95, 'T': 1024, 'activation': 'lrelu', 'actor_lr': 0.0003489571514584541, 'adv_std': True, 'bn': False, 'clipping_epsilon': 0.2, 'critic_lr': 0.00013453971024447964, 'decay_method': 'exponential', 'dropout_prob': 0.3, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.005875806148181214, 'epochs': 10, 'exponential_factor': 0.8761136585214196, 'gamma': 0.95, 'hidden_sizes': [150, 350], 'initialization': 'uniform', 'input_size': 10, 'l1_factor': 2.6502689160341216e-06, 'l2_factor': 0.0003366487940614071, 'lrelu': 0.1, 'minibatch_size': 64, 'momentum': 0.8, 'output_size': 6, 'target_kl': 0.03, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [150, 350], 'output_size': 6, 'dropout_prob': 0.3, 'activation': 'lrelu', 'lrelu': 0.1, 'bn': False, 'momentum': 0.8, 'initialization': 'uniform', 'GAE_lambda': 0.95, 'T': 1024, 'actor_lr': 0.0003489571514584541, 'a

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▁▁▁▁▃▂█▁▄▄▃▂▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂
Duration/Mean_val_ep_duration,▁▁▁▁▄▃█▆▅▄▅▄▅▂▂▂▃▃▃▂▂▂▂▃▃▂▂▂▂▃▃▂▂▂▃▂▂▂▃▃
Learning_rate/Actor,█▇▆▆▅▅▄▄▃▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,█▇▆▆▅▅▄▄▃▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,▅▃▅▆▂▁▂▁▄▄▂▃▃▂▂█▄▂▁▂▁▁▃▂▂▂▁▁▁▂▂▁▃▁▁▂▂▂▂▂
Loss/Critic_loss,█▆▃▃▄▃▂▂▂▃▃▂▂▃▃▂▁▃▁▃▂▃▂▁▃▃▃▂▂▃▂▃▂▂▁▂▂▂▂▂
Loss/Entropy_bonus,█▆▆▅▃▄▃▃▂▁▂▃▃▃▂▄▄▂▃▂▁▁▂▂▂▂▁▂▃▂▁▃▂▂▁▁▂▂▂▁
Loss/KL_divergence,█▆▆█▅▅▆▅▅▅▆▅▅▂▁▆▃▅▃▅▃▃▃▅▅▃▃▄▃▂▃▂▃▃▂▃▂▄▃▃
Loss/Policy_loss,▅▃▅▆▂▁▂▁▄▄▂▃▃▂▂█▄▂▁▂▁▁▃▂▂▂▁▁▁▂▂▁▃▁▁▂▂▂▂▂
Loss/Regularized_Actor_loss,▆▃▅▆▃▂▃▂▄▄▂▃▃▂▂█▄▃▂▂▁▁▃▂▂▂▁▁▁▂▂▁▃▁▁▂▂▂▂▂

0,1
Duration/Mean_train_ep_duration,109.375
Duration/Mean_val_ep_duration,205.89999
Learning_rate/Actor,0.0
Learning_rate/Critic,0.0
Loss/Actor_loss,-0.00848
Loss/Critic_loss,10.24656
Loss/Entropy_bonus,0.72862
Loss/KL_divergence,0.00382
Loss/Policy_loss,-0.0042
Loss/Regularized_Actor_loss,0.12478


[34m[1mwandb[0m: Agent Starting Run: 3hxjla7l with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 768
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	actor_lr: 0.0002692020048286012
[34m[1mwandb[0m: 	adv_std: False
[34m[1mwandb[0m: 	bn: False
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.0011000457691878168
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.3
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.046153091610309886
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.95845005590938
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	hidden_sizes: [350, 250, 350, 150]
[34m[1mwandb[0m: 	initialization: orthogonal
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 6.330744846833401e-06
[34m[1mwandb[0m: 	l2_factor: 2.2728272919362938e-06
[34m[1mwandb

Config del trial
{'GAE_lambda': 0.95, 'T': 768, 'activation': 'tanh', 'actor_lr': 0.0002692020048286012, 'adv_std': False, 'bn': False, 'clipping_epsilon': 0.2, 'critic_lr': 0.0011000457691878168, 'decay_method': 'exponential', 'dropout_prob': 0.3, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.046153091610309886, 'epochs': 10, 'exponential_factor': 0.95845005590938, 'gamma': 0.99, 'hidden_sizes': [350, 250, 350, 150], 'initialization': 'orthogonal', 'input_size': 10, 'l1_factor': 6.330744846833401e-06, 'l2_factor': 2.2728272919362938e-06, 'lrelu': 0.01, 'minibatch_size': 128, 'momentum': 0.9, 'output_size': 6, 'target_kl': 0.03, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [350, 250, 350, 150], 'output_size': 6, 'dropout_prob': 0.3, 'activation': 'tanh', 'lrelu': 0.01, 'bn': False, 'momentum': 0.9, 'initialization': 'orthogonal', 'GAE_lambda': 0.95, 'T': 768, 'actor_lr': 0.

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▄▃▃▃▃▃▃▄▄▆▆▇▅▆▆▆▇▆▆▇▆▇█▆▇▆▆▇▆▇▆▆▆▇▇▅▇▅▆
Duration/Mean_val_ep_duration,▁▁▁▂▁▂▃▂▄▃▄▇▇▅▅▅▅▇▄▆▆▅▅▇▆▆▇▇▇█▆▆▆▆▆▇▄▇▇▆
Learning_rate/Actor,█▇▆▆▅▅▄▄▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,█▇▆▆▅▅▄▄▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,█▃▄▇█▆█▆▆▄▄▃▂▅▃▁▂▁▂▃▂▃▁▁▂▂▃▁▂▂▁▂▂▃▁▁▃▁▂▂
Loss/Critic_loss,█▂▂▂▂▂▃▂▁▂▂▂▁▂▂▂▁▂▂▂▁▂▁▁▂▂▂▁▂▁▂▁▁▂▁▁▁▁▁▂
Loss/Entropy_bonus,█▇▆▅▅▄▅▅▄▃▃▂▂▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/KL_divergence,▇▇▄▅▅▇▇█▆█▆▆▅▄▆▆▇▅▃▇▃▅▅▅▅▃▃▄▃▅▃▄▄▃▃▅▄▄▁▃
Loss/Policy_loss,█▃▄▇█▆█▆▆▄▄▃▂▅▃▁▂▁▂▃▂▃▁▁▂▂▃▁▂▂▁▂▂▃▁▁▃▁▂▂
Loss/Regularized_Actor_loss,█▃▄▇█▆█▆▆▄▄▃▂▅▃▁▂▁▂▃▂▃▁▁▂▂▃▁▂▂▁▂▂▃▁▁▃▁▂▂

0,1
Duration/Mean_train_ep_duration,102.71429
Duration/Mean_val_ep_duration,109.0
Learning_rate/Actor,0.0
Learning_rate/Critic,1e-05
Loss/Actor_loss,8.67928
Loss/Critic_loss,4.73793
Loss/Entropy_bonus,0.95945
Loss/KL_divergence,-0.01844
Loss/Policy_loss,8.72356
Loss/Regularized_Actor_loss,8.77082


[34m[1mwandb[0m: Agent Starting Run: 48ix3zrb with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 768
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	actor_lr: 0.006179778012259366
[34m[1mwandb[0m: 	adv_std: False
[34m[1mwandb[0m: 	bn: True
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.004824606345712751
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.0003725617620204196
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.9988965030099008
[34m[1mwandb[0m: 	gamma: 0.9
[34m[1mwandb[0m: 	hidden_sizes: [250, 150, 250, 250]
[34m[1mwandb[0m: 	initialization: orthogonal
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 1.1663894993434502e-05
[34m[1mwandb[0m: 	l2_factor: 3.2781333552433187e-06
[34m[1mwandb[

Config del trial
{'GAE_lambda': 0.95, 'T': 768, 'activation': 'tanh', 'actor_lr': 0.006179778012259366, 'adv_std': False, 'bn': True, 'clipping_epsilon': 0.2, 'critic_lr': 0.004824606345712751, 'decay_method': 'exponential', 'dropout_prob': 0, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.0003725617620204196, 'epochs': 10, 'exponential_factor': 0.9988965030099008, 'gamma': 0.9, 'hidden_sizes': [250, 150, 250, 250], 'initialization': 'orthogonal', 'input_size': 10, 'l1_factor': 1.1663894993434502e-05, 'l2_factor': 3.2781333552433187e-06, 'lrelu': 0.01, 'minibatch_size': 128, 'momentum': 0.99, 'output_size': 6, 'target_kl': 0.01, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [250, 150, 250, 250], 'output_size': 6, 'dropout_prob': 0, 'activation': 'tanh', 'lrelu': 0.01, 'bn': True, 'momentum': 0.99, 'initialization': 'orthogonal', 'GAE_lambda': 0.95, 'T': 768, 'actor_lr': 0.006

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▂▆▂█▆▅▇▇▇▇▇▇▇▅▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▆▇▇
Duration/Mean_val_ep_duration,▂▁▃██▇█▆▆▇▇▇▇▆▅▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
Learning_rate/Actor,████▇▇▇▇▇▆▆▆▆▆▆▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▁▁▁
Learning_rate/Critic,████▇▇▇▇▇▆▆▆▆▆▆▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▁▁▁
Loss/Actor_loss,█▅▁▆▂▂▃▁▂▁▂▁▁▂▂▂▂▁▂▁▂▂▂▁▂▁▂▁▂▂▂▂▁▂▁▁▂▁▁▂
Loss/Critic_loss,█▄▂▃▅▃▄▂▁▁▁▁▁▂▃▃▂▁▁▁▁▁▁▂▃▂▁▂▁▁▁▁▂▂▁▂▁▁▁▁
Loss/Entropy_bonus,█▇▇▄▄▄▄▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/KL_divergence,▅▅▅▂█▅▄▂▄▁▄▆▅▄▅▆▆▅▄▅▂▃▅▅▅▄▄▆▆▅▅▄▄▅▆▆▆▅▄▅
Loss/Policy_loss,█▅▁▆▂▂▃▁▂▁▂▁▁▂▂▂▂▁▂▁▂▂▂▁▂▁▂▁▂▂▂▂▁▂▁▁▂▁▁▂
Loss/Regularized_Actor_loss,█▅▁▆▂▂▃▁▂▁▂▁▁▂▂▂▂▁▂▁▂▂▂▁▂▁▂▁▂▂▂▂▁▂▁▁▂▁▁▂

0,1
Duration/Mean_train_ep_duration,129.0
Duration/Mean_val_ep_duration,129.39999
Learning_rate/Actor,0.00568
Learning_rate/Critic,0.00444
Loss/Actor_loss,-1.00918
Loss/Critic_loss,1.13488
Loss/Entropy_bonus,0.03452
Loss/KL_divergence,0.02159
Loss/Policy_loss,-1.00917
Loss/Regularized_Actor_loss,-0.86662


[34m[1mwandb[0m: Agent Starting Run: 8x4mrtvi with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 1024
[34m[1mwandb[0m: 	activation: lrelu
[34m[1mwandb[0m: 	actor_lr: 0.00012169275453672376
[34m[1mwandb[0m: 	adv_std: False
[34m[1mwandb[0m: 	bn: False
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.004302512627791511
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.2
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.018241110283041193
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.9749493872338776
[34m[1mwandb[0m: 	gamma: 0.95
[34m[1mwandb[0m: 	hidden_sizes: [250, 150, 250, 350]
[34m[1mwandb[0m: 	initialization: orthogonal
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 2.7385250123977275e-06
[34m[1mwandb[0m: 	l2_factor: 6.284751929735322e-05
[34m[1mw

Config del trial
{'GAE_lambda': 0.95, 'T': 1024, 'activation': 'lrelu', 'actor_lr': 0.00012169275453672376, 'adv_std': False, 'bn': False, 'clipping_epsilon': 0.2, 'critic_lr': 0.004302512627791511, 'decay_method': 'exponential', 'dropout_prob': 0.2, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.018241110283041193, 'epochs': 10, 'exponential_factor': 0.9749493872338776, 'gamma': 0.95, 'hidden_sizes': [250, 150, 250, 350], 'initialization': 'orthogonal', 'input_size': 10, 'l1_factor': 2.7385250123977275e-06, 'l2_factor': 6.284751929735322e-05, 'lrelu': 0.001, 'minibatch_size': 256, 'momentum': 0.95, 'output_size': 6, 'target_kl': 0.02, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [250, 150, 250, 350], 'output_size': 6, 'dropout_prob': 0.2, 'activation': 'lrelu', 'lrelu': 0.001, 'bn': False, 'momentum': 0.95, 'initialization': 'orthogonal', 'GAE_lambda': 0.95, 'T': 1024, 'act

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▂▂▃▃▄▄▆▅▇██▆▇▇▇▇▆▆▆▄▄▆▅▄▄▄▃▃▃▂▂▂▂▂▂▂▂▂▂
Duration/Mean_val_ep_duration,▁▁▂▃▃▆▄▆▅▇█▆▇▆▆▆▇▄▅▄▄▄▄▄▄▃▂▂▂▂▂▂▂▁▂▁▁▁▁▁
Learning_rate/Actor,███▇▇▇▆▆▆▆▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁
Learning_rate/Critic,███▇▇▇▆▆▆▆▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁
Loss/Actor_loss,█▆▇▆▄▃▃▂▂▂▂▂▃▃▂▂▂▁▂▃▃▃▁▂▃▂▁▄▅▅▅▆▇▆▆▅▆▆▆▇
Loss/Critic_loss,█▃▅▄▄▃▃▂▂▂▂▁▂▂▁▁▁▁▁▂▃▂▁▂▂▂▂▂▂▂▁▂▁▁▁▁▁▂▂▂
Loss/Entropy_bonus,████▇▇▇▇▆▆▆▆▅▅▅▄▅▄▄▄▃▃▄▄▄▄▃▃▂▂▂▂▂▁▁▁▂▂▁▁
Loss/KL_divergence,▆▆▆▅▆▇▆▆▆▆▆▆▅▅▆▇▅▅▇▅▄▄▆▄▅▆█▆▆▅▅▄▅▆▅▅▁▄▅▆
Loss/Policy_loss,█▆▇▆▄▃▃▂▂▂▂▂▃▃▂▂▂▁▂▃▃▃▁▂▃▂▁▄▅▅▅▆▇▆▆▅▆▆▆▇
Loss/Regularized_Actor_loss,█▆▇▆▄▃▃▂▂▂▂▂▃▃▂▂▂▁▂▃▃▃▁▂▃▂▁▄▅▅▅▆▇▆▆▅▆▆▆▇

0,1
Duration/Mean_train_ep_duration,54.41177
Duration/Mean_val_ep_duration,54.9
Learning_rate/Actor,3e-05
Learning_rate/Critic,0.00118
Loss/Actor_loss,12.34093
Loss/Critic_loss,2.85105
Loss/Entropy_bonus,0.37546
Loss/KL_divergence,0.00475
Loss/Policy_loss,12.34778
Loss/Regularized_Actor_loss,12.4371


[34m[1mwandb[0m: Agent Starting Run: 3ylgervy with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 1024
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	actor_lr: 0.0012405993154504652
[34m[1mwandb[0m: 	adv_std: False
[34m[1mwandb[0m: 	bn: True
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.00032196669792835625
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.1
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.04997452531330648
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.9337345121334832
[34m[1mwandb[0m: 	gamma: 0.9
[34m[1mwandb[0m: 	hidden_sizes: [350, 150, 350, 350]
[34m[1mwandb[0m: 	initialization: orthogonal
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 3.90465561979168e-05
[34m[1mwandb[0m: 	l2_factor: 0.0005147770722362436
[34m[1mwandb

Config del trial
{'GAE_lambda': 0.95, 'T': 1024, 'activation': 'tanh', 'actor_lr': 0.0012405993154504652, 'adv_std': False, 'bn': True, 'clipping_epsilon': 0.2, 'critic_lr': 0.00032196669792835625, 'decay_method': 'exponential', 'dropout_prob': 0.1, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.04997452531330648, 'epochs': 10, 'exponential_factor': 0.9337345121334832, 'gamma': 0.9, 'hidden_sizes': [350, 150, 350, 350], 'initialization': 'orthogonal', 'input_size': 10, 'l1_factor': 3.90465561979168e-05, 'l2_factor': 0.0005147770722362436, 'lrelu': 0.1, 'minibatch_size': 32, 'momentum': 0.9, 'output_size': 6, 'target_kl': 0.01, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [350, 150, 350, 350], 'output_size': 6, 'dropout_prob': 0.1, 'activation': 'tanh', 'lrelu': 0.1, 'bn': True, 'momentum': 0.9, 'initialization': 'orthogonal', 'GAE_lambda': 0.95, 'T': 1024, 'actor_lr': 0.0012

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▂▃█▅▆▅▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▆▇▇▇▇▇▇▆▇▇▆▇▇▇▇▇
Duration/Mean_val_ep_duration,▁▄█▅▄▆▆▆▆▇▆▅▅▆▅▆▆▆▆▆▆▆▆▆▆▅▆▆▆▆▆▆▆▆▆▆▆▆▆▆
Learning_rate/Actor,█▇▆▆▅▅▄▄▃▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,█▇▆▆▅▅▄▄▃▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,██▅▂▃▂▃▃▂▃▅▂▂▂▂▃▃▂▂▁▂▂▂▂▂▁▁▃▃▃▂▄▃▁▂▃▂▄▁▂
Loss/Critic_loss,█▅▄▂▂▂▂▂▁▂▃▂▂▁▂▂▂▁▂▁▁▁▁▁▁▁▁▁▂▁▁▂▁▁▂▁▁▂▁▁
Loss/Entropy_bonus,██▇▇▆▄▃▄▃▃▃▂▃▃▂▄▃▃▂▂▂▂▂▂▃▂▂▃▃▂▂▃▂▁▂▂▁▃▁▂
Loss/KL_divergence,▅▆▅▆▆▅▆▅▇▆▇▇▁█▅▅▆▅▅▇▆▆▅▄▆▅▆▃▆▅▅▆▅▅▅▅▄▆▇▆
Loss/Policy_loss,██▅▂▃▂▃▃▂▃▅▂▂▂▂▃▃▂▂▁▂▂▂▂▂▁▁▃▃▃▂▄▃▁▂▃▂▄▁▂
Loss/Regularized_Actor_loss,██▅▂▃▂▃▃▂▃▅▂▂▂▂▃▃▂▂▁▂▂▂▂▂▁▁▃▃▃▂▄▃▁▂▃▂▃▁▂

0,1
Duration/Mean_train_ep_duration,95.88889
Duration/Mean_val_ep_duration,90.1
Learning_rate/Actor,1e-05
Learning_rate/Critic,0.0
Loss/Actor_loss,-2.37723
Loss/Critic_loss,1.14651
Loss/Entropy_bonus,0.53933
Loss/KL_divergence,0.02323
Loss/Policy_loss,-2.35027
Loss/Regularized_Actor_loss,-1.07895


[34m[1mwandb[0m: Agent Starting Run: enpv4s2d with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 768
[34m[1mwandb[0m: 	activation: lrelu
[34m[1mwandb[0m: 	actor_lr: 0.0003185864450747112
[34m[1mwandb[0m: 	adv_std: False
[34m[1mwandb[0m: 	bn: False
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.0009749802489896222
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.2
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.032927301101294014
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.9464004767371824
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	hidden_sizes: [350, 150, 250, 250]
[34m[1mwandb[0m: 	initialization: uniform
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 0.0005344485465055979
[34m[1mwandb[0m: 	l2_factor: 1.2097712257580445e-05
[34m[1mwandb

Config del trial
{'GAE_lambda': 0.95, 'T': 768, 'activation': 'lrelu', 'actor_lr': 0.0003185864450747112, 'adv_std': False, 'bn': False, 'clipping_epsilon': 0.2, 'critic_lr': 0.0009749802489896222, 'decay_method': 'exponential', 'dropout_prob': 0.2, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.032927301101294014, 'epochs': 10, 'exponential_factor': 0.9464004767371824, 'gamma': 0.99, 'hidden_sizes': [350, 150, 250, 250], 'initialization': 'uniform', 'input_size': 10, 'l1_factor': 0.0005344485465055979, 'l2_factor': 1.2097712257580445e-05, 'lrelu': 0.01, 'minibatch_size': 128, 'momentum': 0.99, 'output_size': 6, 'target_kl': 0.01, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [350, 150, 250, 250], 'output_size': 6, 'dropout_prob': 0.2, 'activation': 'lrelu', 'lrelu': 0.01, 'bn': False, 'momentum': 0.99, 'initialization': 'uniform', 'GAE_lambda': 0.95, 'T': 768, 'actor_lr': 0.

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▄▃▄▃▆▄▄▇▇▆▅▆▅▅▅▆▅▄▆▅▆▆▆▅▅▆▇█▅▆▆▆▆▅▅▆▆▆▅
Duration/Mean_val_ep_duration,▁▂▁▄▃▃▆▅▆█▃▄▅▇▃▆█▆▄▄▅▇▇▅▆▆▅▅▅▆▆▆▅▇▄▄▄▆▅▄
Learning_rate/Actor,█▇▇▆▅▅▅▄▄▄▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,█▇▇▆▅▅▅▄▄▄▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,█▄▅▃▄▂▂▂▂▁▃▂▁▂▂▁▂▁▁▁▂▁▁▂▂▃▂▁▁▃▂▁▂▁▃▁▂▂▂▃
Loss/Critic_loss,█▃▃▃▄▂▃▂▂▁▂▁▁▃▂▂▁▂▂▁▂▁▁▁▂▁▁▁▃▂▁▁▂▁▁▂▂▁▁▁
Loss/Entropy_bonus,▁▃▆▇█▇▇▆▇▇▇▆▆▇▆▇▇▅▆▆▆▅▅▅▄▅▇▅▆▆▆▆▆▆▆▆▆▆▆▆
Loss/KL_divergence,█▂▃▂▃▂▁▃▁▁▂▂▁▂▂▂▁▂▃▁▂▂▃▂▁▂▂▂▂▂▃▂▂▂▂▂▂▂▂▂
Loss/Policy_loss,█▄▅▃▄▂▂▂▂▁▃▂▁▂▂▁▂▁▁▁▂▁▁▂▂▃▂▁▁▃▂▁▂▁▃▁▂▂▂▃
Loss/Regularized_Actor_loss,█▄▅▃▄▂▂▂▂▁▃▂▁▂▂▁▂▁▁▁▂▁▁▂▂▃▂▁▁▂▂▁▂▁▃▁▂▂▂▃

0,1
Duration/Mean_train_ep_duration,50.0
Duration/Mean_val_ep_duration,49.4
Learning_rate/Actor,0.0
Learning_rate/Critic,1e-05
Loss/Actor_loss,26.42885
Loss/Critic_loss,5.9072
Loss/Entropy_bonus,1.53334
Loss/KL_divergence,0.00269
Loss/Policy_loss,26.47934
Loss/Regularized_Actor_loss,32.37696


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ykoal2yb with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 256
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	actor_lr: 0.0007683729850399673
[34m[1mwandb[0m: 	adv_std: True
[34m[1mwandb[0m: 	bn: False
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.0031834290132694626
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.1
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.03910280049020148
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.9430100887992224
[34m[1mwandb[0m: 	gamma: 0.9
[34m[1mwandb[0m: 	hidden_sizes: [350, 150, 150, 350]
[34m[1mwandb[0m: 	initialization: uniform
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 1.108881948

Config del trial
{'GAE_lambda': 0.95, 'T': 256, 'activation': 'tanh', 'actor_lr': 0.0007683729850399673, 'adv_std': True, 'bn': False, 'clipping_epsilon': 0.2, 'critic_lr': 0.0031834290132694626, 'decay_method': 'exponential', 'dropout_prob': 0.1, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.03910280049020148, 'epochs': 10, 'exponential_factor': 0.9430100887992224, 'gamma': 0.9, 'hidden_sizes': [350, 150, 150, 350], 'initialization': 'uniform', 'input_size': 10, 'l1_factor': 1.1088819481269017e-06, 'l2_factor': 4.025686261245071e-06, 'lrelu': 0.001, 'minibatch_size': 128, 'momentum': 0.95, 'output_size': 6, 'target_kl': 0.01, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [350, 150, 150, 350], 'output_size': 6, 'dropout_prob': 0.1, 'activation': 'tanh', 'lrelu': 0.001, 'bn': False, 'momentum': 0.95, 'initialization': 'uniform', 'GAE_lambda': 0.95, 'T': 256, 'actor_lr': 0.000

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▄▄▅▆▄█▅▅▅▅▅▅▄▄▄▄▄▄▄▅▄▅▅▅▅▅▅▄▄▅▅▅▅▅▅▅▅▅▅
Duration/Mean_val_ep_duration,▂▁▃▄▆█▄▅▄▅▄▄▄▃▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▃▄▄▄▄▄▄▄
Learning_rate/Actor,█▇▇▆▅▅▄▄▄▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,█▇▇▆▅▅▄▄▄▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,▁▂▂▄▂▂▃▄▃▄▄▅▅▄▄▅▇▆▆▆▆▇▇▇▆▇▅▆▆▇▇▆▆▆▆▆▇█▇▆
Loss/Critic_loss,█▅▃▂▄▄▄▃▂▃▂▃▂▂▂▃▂▂▂▂▂▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Entropy_bonus,█▇▇▆▇▆▆▅▄▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂
Loss/KL_divergence,▅▆▄█▃▆▃▄▃▃▃▃▁▅▂▃▃▃▃▃▂▃▃▃▂▃▁▃▂▃▃▂▂▂▃▃▂▂▃▂
Loss/Policy_loss,▁▃▁▄▁▁▂▃▁▂▂▅▅▁▁▃▅▄▄▅▄▆▆▅▅▅▃▄▄▆▅▄▄▅▅▅▆█▅▅
Loss/Regularized_Actor_loss,▁▂▂▄▂▂▃▄▃▄▄▅▅▄▄▅▇▆▆▆▆▇▇▇▆▇▅▆▆▇▇▆▆▆▆▆▇█▇▆

0,1
Duration/Mean_train_ep_duration,59.33333
Duration/Mean_val_ep_duration,58.1
Learning_rate/Actor,1e-05
Learning_rate/Critic,2e-05
Loss/Actor_loss,-0.01756
Loss/Critic_loss,1.75419
Loss/Entropy_bonus,0.28842
Loss/KL_divergence,-0.0078
Loss/Policy_loss,-0.00628
Loss/Regularized_Actor_loss,-0.00122


[34m[1mwandb[0m: Agent Starting Run: ucweifxw with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 256
[34m[1mwandb[0m: 	activation: lrelu
[34m[1mwandb[0m: 	actor_lr: 0.0018597558455479837
[34m[1mwandb[0m: 	adv_std: True
[34m[1mwandb[0m: 	bn: False
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.0003802279743328016
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.2
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.031443250204864874
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.9913587228241542
[34m[1mwandb[0m: 	gamma: 0.9
[34m[1mwandb[0m: 	hidden_sizes: [250, 150, 350]
[34m[1mwandb[0m: 	initialization: uniform
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 0.000496235211384677
[34m[1mwandb[0m: 	l2_factor: 0.00022208198245727136
[34m[1mwandb[0m: 	l

Config del trial
{'GAE_lambda': 0.95, 'T': 256, 'activation': 'lrelu', 'actor_lr': 0.0018597558455479837, 'adv_std': True, 'bn': False, 'clipping_epsilon': 0.2, 'critic_lr': 0.0003802279743328016, 'decay_method': 'exponential', 'dropout_prob': 0.2, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.031443250204864874, 'epochs': 10, 'exponential_factor': 0.9913587228241542, 'gamma': 0.9, 'hidden_sizes': [250, 150, 350], 'initialization': 'uniform', 'input_size': 10, 'l1_factor': 0.000496235211384677, 'l2_factor': 0.00022208198245727136, 'lrelu': 0.001, 'minibatch_size': 256, 'momentum': 0.99, 'output_size': 6, 'target_kl': 0.02, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [250, 150, 350], 'output_size': 6, 'dropout_prob': 0.2, 'activation': 'lrelu', 'lrelu': 0.001, 'bn': False, 'momentum': 0.99, 'initialization': 'uniform', 'GAE_lambda': 0.95, 'T': 256, 'actor_lr': 0.00185975584

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▂▂▂▂▂▂▂▃▅▄▃▆▃▅▆▅▇▆▄▇▆▇▅▄▃▅▄▆▇▃▅█▆▄▇▇▃▄▆
Duration/Mean_val_ep_duration,▂▁▁▁▁▁▁▂▅█▄▃▃▃▆▅▅▆▃▃▄▇▅▇▅▄▄▆▆▅▇▆▇▅█▆▅▄▃▃
Learning_rate/Actor,██▇▇▇▆▆▆▆▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁
Learning_rate/Critic,██▇▇▇▆▆▆▆▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁
Loss/Actor_loss,█▇▇▇▇▄▃▃▅▃▇▄▂▄▅▄▃▃▄▃▅▃▃▃▃▄▄▆▃▄▃▄▁▃▃▄▄▄▄▄
Loss/Critic_loss,█▄▃▂▂▂▂▂▃▃▃▂▃▃▂▂▂▁▂▂▂▂▁▁▂▂▂▁▂▂▁▁▁▁▁▁▂▂▂▄
Loss/Entropy_bonus,▅▁▁▁▁▃▄▄▆▅▅▆▆▅▅▅▅▇▅▆▆▅▆▇▆▅▇▅▇▆█▇▇██▇█▇▇█
Loss/KL_divergence,█▅▅▅▅▅▅▅▅▇▅▄▄▅▃▅▄▃▅▅▄▄▅▄▅▅▃▆▁▄▄▅▄▅▅▅▅▆▅▅
Loss/Policy_loss,█▄▄▄▄▂▂▂▄▃▆▃▁▃▅▃▂▃▃▃▅▂▃▃▂▃▅▆▃▄▄▅▁▄▄▅▅▄▅▅
Loss/Regularized_Actor_loss,█▅▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
Duration/Mean_train_ep_duration,66.0
Duration/Mean_val_ep_duration,78.0
Learning_rate/Actor,0.00047
Learning_rate/Critic,0.0001
Loss/Actor_loss,-0.02632
Loss/Critic_loss,6.65256
Loss/Entropy_bonus,1.07179
Loss/KL_divergence,0.01312
Loss/Policy_loss,0.00738
Loss/Regularized_Actor_loss,0.00038


[34m[1mwandb[0m: Agent Starting Run: wfj2zvtm with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 1024
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	actor_lr: 0.0038713416832021888
[34m[1mwandb[0m: 	adv_std: False
[34m[1mwandb[0m: 	bn: False
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.00022432907397421917
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.1
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.010608259143273553
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.8681799642388542
[34m[1mwandb[0m: 	gamma: 0.95
[34m[1mwandb[0m: 	hidden_sizes: [150, 250, 150, 250]
[34m[1mwandb[0m: 	initialization: normal
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 3.1915519044225916e-06
[34m[1mwandb[0m: 	l2_factor: 3.6536657657638328e-06
[34m[1mwand

Config del trial
{'GAE_lambda': 0.95, 'T': 1024, 'activation': 'tanh', 'actor_lr': 0.0038713416832021888, 'adv_std': False, 'bn': False, 'clipping_epsilon': 0.2, 'critic_lr': 0.00022432907397421917, 'decay_method': 'exponential', 'dropout_prob': 0.1, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.010608259143273553, 'epochs': 10, 'exponential_factor': 0.8681799642388542, 'gamma': 0.95, 'hidden_sizes': [150, 250, 150, 250], 'initialization': 'normal', 'input_size': 10, 'l1_factor': 3.1915519044225916e-06, 'l2_factor': 3.6536657657638328e-06, 'lrelu': 0.001, 'minibatch_size': 64, 'momentum': 0.99, 'output_size': 6, 'target_kl': 0.02, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [150, 250, 150, 250], 'output_size': 6, 'dropout_prob': 0.1, 'activation': 'tanh', 'lrelu': 0.001, 'bn': False, 'momentum': 0.99, 'initialization': 'normal', 'GAE_lambda': 0.95, 'T': 1024, 'actor_lr': 0

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▃▂▃▄▄▄▄▅▅▅▆▇█▇▇██▇██▇▇▇▇███▇███▇███████
Duration/Mean_val_ep_duration,▁▂▁▂▄▃▃▄▄▅▃▇▇██▇▇██▇█▇███▇▇█▇▇█▇▇▆█▇█▇▇█
Learning_rate/Actor,█▆▄▄▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,█▆▄▄▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,█▆█▅▄▃▄▆▄▃▃▂▁▂▂▂▁▂▂▂▃▃▂▂▃▁▂▁▁▁▂▂▃▂▂▂▂▂▂▁
Loss/Critic_loss,█▃▄▃▂▁▁▂▁▂▃▁▂▂▁▁▁▁▂▂▁▂▂▂▂▂▂▂▁▂▁▂▂▂▁▂▁▁▁▂
Loss/Entropy_bonus,█▅▅▆▅▄▃▄▅▅▆▅▄▄▃▂▂▁▂▂▃▂▂▂▂▂▂▃▁▂▂▂▂▂▂▂▂▃▂▂
Loss/KL_divergence,█▄▇▄▅▄▁▂▂▃▂▂▄▆▃▃▃▃▃▂▁▁▃▂▃▁▄▃▁▂▂▂▂▂▂▂▂▂▂▂
Loss/Policy_loss,█▆█▅▄▃▄▆▄▃▃▂▁▂▂▂▁▂▂▂▃▃▂▂▃▁▂▁▁▁▂▂▃▂▂▂▂▂▂▁
Loss/Regularized_Actor_loss,█▆█▅▄▃▄▆▄▃▃▂▁▂▂▂▁▂▂▂▃▃▂▂▃▁▂▁▁▁▂▂▃▂▂▂▂▂▂▁

0,1
Duration/Mean_train_ep_duration,104.875
Duration/Mean_val_ep_duration,106.7
Learning_rate/Actor,0.0
Learning_rate/Critic,0.0
Loss/Actor_loss,-2.65524
Loss/Critic_loss,5.31141
Loss/Entropy_bonus,0.58195
Loss/KL_divergence,-0.00846
Loss/Policy_loss,-2.64907
Loss/Regularized_Actor_loss,-2.61673


[34m[1mwandb[0m: Agent Starting Run: cr2029ko with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 768
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	actor_lr: 0.009714704638392302
[34m[1mwandb[0m: 	adv_std: False
[34m[1mwandb[0m: 	bn: True
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.0014646310703144909
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.2
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.04362729356760811
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.893008138932387
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	hidden_sizes: [150, 250, 250, 150]
[34m[1mwandb[0m: 	initialization: orthogonal
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 0.0002553595598511063
[34m[1mwandb[0m: 	l2_factor: 2.7885447356106316e-05
[34m[1mwandb[

Config del trial
{'GAE_lambda': 0.95, 'T': 768, 'activation': 'tanh', 'actor_lr': 0.009714704638392302, 'adv_std': False, 'bn': True, 'clipping_epsilon': 0.2, 'critic_lr': 0.0014646310703144909, 'decay_method': 'exponential', 'dropout_prob': 0.2, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.04362729356760811, 'epochs': 10, 'exponential_factor': 0.893008138932387, 'gamma': 0.99, 'hidden_sizes': [150, 250, 250, 150], 'initialization': 'orthogonal', 'input_size': 10, 'l1_factor': 0.0002553595598511063, 'l2_factor': 2.7885447356106316e-05, 'lrelu': 0.01, 'minibatch_size': 256, 'momentum': 0.8, 'output_size': 6, 'target_kl': 0.03, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [150, 250, 250, 150], 'output_size': 6, 'dropout_prob': 0.2, 'activation': 'tanh', 'lrelu': 0.01, 'bn': True, 'momentum': 0.8, 'initialization': 'orthogonal', 'GAE_lambda': 0.95, 'T': 768, 'actor_lr': 0.009

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▁▂▃▃▅▄▆▇█▅▄▇▇▄▅█▆▇▅▆▇▆▅▆▆▆▆▅▆▅▅▆▄▅▅▄▅▅▅
Duration/Mean_val_ep_duration,▁▃▂▁▂▄▄█▆▇▇█▆▇▇▇▇▆▇▇▇▆▆▇▆▆▅▅▅▆▆▆▅▆▆▅▅▆▆▅
Learning_rate/Actor,█▇▆▅▄▄▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,█▇▆▅▄▄▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,▅▇▆█▅▅▄▄▃▂▃▄▄▃▅▂▃▂▁▂▂▁▁▂▂▁▂▁▂▂▂▃▁▂▂▁▂▂▂▁
Loss/Critic_loss,▇▇█▅▄▅▄▃▂▁▃▃▂▂▅▄▄▄▃▃▂▁▁▂▂▁▂▂▂▃▂▂▂▂▂▁▂▂▁▂
Loss/Entropy_bonus,█▇▅▇▇▇▇▆▇▆▆▆▆▅▅▅▅▄▃▃▃▃▂▂▂▂▂▁▁▂▂▁▁▁▁▁▁▁▁▁
Loss/KL_divergence,█▇▄▁▄▄▆▅▄▅▅▄▅▄▄▄▅▅▅▅▄▅▄▄▄▄▄▅▄▅▄▄▄▄▄▄▄▅▄▄
Loss/Policy_loss,▅▇▆█▅▅▄▄▃▂▃▄▄▃▅▂▃▂▁▂▂▁▁▂▂▁▂▁▂▂▂▃▁▂▂▁▂▂▂▁
Loss/Regularized_Actor_loss,▆▇▆█▅▅▄▄▃▂▃▄▄▃▄▂▃▂▁▂▂▁▁▂▂▁▂▁▂▂▂▃▁▂▂▁▂▂▂▁

0,1
Duration/Mean_train_ep_duration,107.0
Duration/Mean_val_ep_duration,92.5
Learning_rate/Actor,0.0
Learning_rate/Critic,0.0
Loss/Actor_loss,-0.56111
Loss/Critic_loss,7.56201
Loss/Entropy_bonus,0.69237
Loss/KL_divergence,-0.00215
Loss/Policy_loss,-0.5309
Loss/Regularized_Actor_loss,0.71093


[34m[1mwandb[0m: Agent Starting Run: dld7dnke with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 1024
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	actor_lr: 0.00020055342134815744
[34m[1mwandb[0m: 	adv_std: False
[34m[1mwandb[0m: 	bn: False
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.002891488853364463
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.3
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.004842289459843772
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.89356309053321
[34m[1mwandb[0m: 	gamma: 0.9
[34m[1mwandb[0m: 	hidden_sizes: [150, 250, 250, 150]
[34m[1mwandb[0m: 	initialization: uniform
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 1.5131024662470609e-06
[34m[1mwandb[0m: 	l2_factor: 7.019108441227556e-05
[34m[1mwandb[0

Config del trial
{'GAE_lambda': 0.95, 'T': 1024, 'activation': 'tanh', 'actor_lr': 0.00020055342134815744, 'adv_std': False, 'bn': False, 'clipping_epsilon': 0.2, 'critic_lr': 0.002891488853364463, 'decay_method': 'exponential', 'dropout_prob': 0.3, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.004842289459843772, 'epochs': 10, 'exponential_factor': 0.89356309053321, 'gamma': 0.9, 'hidden_sizes': [150, 250, 250, 150], 'initialization': 'uniform', 'input_size': 10, 'l1_factor': 1.5131024662470609e-06, 'l2_factor': 7.019108441227556e-05, 'lrelu': 0.1, 'minibatch_size': 128, 'momentum': 0.9, 'output_size': 6, 'target_kl': 0.02, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [150, 250, 250, 150], 'output_size': 6, 'dropout_prob': 0.3, 'activation': 'tanh', 'lrelu': 0.1, 'bn': False, 'momentum': 0.9, 'initialization': 'uniform', 'GAE_lambda': 0.95, 'T': 1024, 'actor_lr': 0.0002005

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▂▂▃▂▃▄▄▄▇▅█▇▆█▇▆█▇███▇██▇▆█▇████▇█▇█▇█▇
Duration/Mean_val_ep_duration,▁▂▁▁▁▂▂▃▅▆▆▇████▇▇▇▆██▇▇▇██▇█▆██████▇▇▇▇
Learning_rate/Actor,█▇▅▅▄▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,█▇▅▅▄▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,▇▅▇█▅▅▄▆▃▂▄▃▂▃▂▁▁▄▂▂▃▃▂▃▂▁▁▂▂▁▂▂▂▃▁▃▂▃▂▃
Loss/Critic_loss,█▃▃▄▂▃▃▃▃▂▃▂▂▃▁▁▂▃▂▁▁▁▁▂▂▁▁▁▁▁▁▁▁▂▁▂▂▂▁▁
Loss/Entropy_bonus,█▇▆▆▄▄▃▃▃▂▂▂▂▂▂▁▁▁▂▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/KL_divergence,▇█▄▆▄▄▅▅▅▅▃▂▂▂▂▃▄▃▄▂▃▁▂▃▂▂▂▄▄▃▂▃▃▄▁▂▃▃▃▃
Loss/Policy_loss,▇▅▇█▅▅▄▆▃▂▄▃▂▃▂▁▁▄▂▂▃▃▂▃▂▁▁▂▂▁▂▂▂▃▁▃▂▃▂▃
Loss/Regularized_Actor_loss,▇▅▇█▅▅▄▆▃▂▄▃▂▃▂▁▁▄▂▂▃▃▂▃▂▁▁▂▂▁▂▂▂▃▁▃▂▃▂▃

0,1
Duration/Mean_train_ep_duration,118.28571
Duration/Mean_val_ep_duration,114.4
Learning_rate/Actor,0.0
Learning_rate/Critic,0.0
Loss/Actor_loss,1.00871
Loss/Critic_loss,1.97297
Loss/Entropy_bonus,0.76469
Loss/KL_divergence,-0.02337
Loss/Policy_loss,1.01241
Loss/Regularized_Actor_loss,1.11611


[34m[1mwandb[0m: Agent Starting Run: r4p8z4sh with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 768
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	actor_lr: 0.004227650480453153
[34m[1mwandb[0m: 	adv_std: False
[34m[1mwandb[0m: 	bn: False
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.004722237754422292
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.2
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.04255686748777848
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.9470193335949972
[34m[1mwandb[0m: 	gamma: 0.9
[34m[1mwandb[0m: 	hidden_sizes: [150, 350, 150, 350]
[34m[1mwandb[0m: 	initialization: normal
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 0.0001111048773894366
[34m[1mwandb[0m: 	l2_factor: 4.7528510920462394e-05
[34m[1mwandb[0m: 

Config del trial
{'GAE_lambda': 0.95, 'T': 768, 'activation': 'tanh', 'actor_lr': 0.004227650480453153, 'adv_std': False, 'bn': False, 'clipping_epsilon': 0.2, 'critic_lr': 0.004722237754422292, 'decay_method': 'exponential', 'dropout_prob': 0.2, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.04255686748777848, 'epochs': 10, 'exponential_factor': 0.9470193335949972, 'gamma': 0.9, 'hidden_sizes': [150, 350, 150, 350], 'initialization': 'normal', 'input_size': 10, 'l1_factor': 0.0001111048773894366, 'l2_factor': 4.7528510920462394e-05, 'lrelu': 0.001, 'minibatch_size': 32, 'momentum': 0.99, 'output_size': 6, 'target_kl': 0.03, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [150, 350, 150, 350], 'output_size': 6, 'dropout_prob': 0.2, 'activation': 'tanh', 'lrelu': 0.001, 'bn': False, 'momentum': 0.99, 'initialization': 'normal', 'GAE_lambda': 0.95, 'T': 768, 'actor_lr': 0.0042276

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▃▃▁▁▁▁▃▁▁▁▁▁▅█▁▂▅▃▃▃▃▃▅▁▃▁▁▁▁▂▄▄▅▄▄▄▅▄▅▄
Duration/Mean_val_ep_duration,▄▁▁▁▁▃▁▁▁▁▁▅█▁▃▅▃▃▃▃▃▅▁▃▁▁▁▁▂▃▄▃▄▄▄▄▄▄▄▄
Learning_rate/Actor,██▇▇▆▆▆▆▅▅▅▄▄▄▄▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁
Learning_rate/Critic,██▇▇▆▆▆▆▅▅▅▄▄▄▄▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁
Loss/Actor_loss,▁█▅▅▅▄▁▅▅▅▅▄▁▁▄▄▂▂▃▂▂▂▁▃▂▅▄▄▄▂▂▁▁▂▂▂▂▂▂▂
Loss/Critic_loss,▂▁▃▁▁▁▂▁▁▁▁▁▂▂▁█▂▂▂▂▂▂▂▃▂▂▂▁▂▂▂▂▂▂▂▂▂▂▂▂
Loss/Entropy_bonus,▆▂▁▁▁█▁▁▁▁▁▁▂▁▆▁▁▁▁▁▁▁▇▂▃▃▃▄▄▄▄▃▄▃▃▄▄▄▄▃
Loss/KL_divergence,▄▄▃▂▂▂▂▂▂▂▂▂▂▄▂▆▂▂▂▂▂▂▂█▂▃▃▁▃▃▃▃▃▃▃▃▃▃▃▃
Loss/Policy_loss,▁█▅▅▅▄▁▅▅▅▅▄▁▁▄▄▂▂▃▂▂▂▁▃▂▅▄▄▄▂▂▁▁▂▂▂▂▂▂▂
Loss/Regularized_Actor_loss,▁█▅▅▅▄▁▅▅▅▅▄▁▁▄▄▂▂▃▂▂▂▁▃▂▅▄▄▄▂▂▁▁▁▂▂▂▂▂▂

0,1
Duration/Mean_train_ep_duration,33.09091
Duration/Mean_val_ep_duration,29.0
Learning_rate/Actor,0.00041
Learning_rate/Critic,0.00045
Loss/Actor_loss,25.55655
Loss/Critic_loss,22.5149
Loss/Entropy_bonus,0.49927
Loss/KL_divergence,0.04918
Loss/Policy_loss,25.5778
Loss/Regularized_Actor_loss,26.07777


[34m[1mwandb[0m: Agent Starting Run: aq12vc3c with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 768
[34m[1mwandb[0m: 	activation: lrelu
[34m[1mwandb[0m: 	actor_lr: 0.00017998524054240066
[34m[1mwandb[0m: 	adv_std: True
[34m[1mwandb[0m: 	bn: True
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.007473688276785075
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.3
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.035500560212916345
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.998385340901956
[34m[1mwandb[0m: 	gamma: 0.95
[34m[1mwandb[0m: 	hidden_sizes: [250, 350]
[34m[1mwandb[0m: 	initialization: orthogonal
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 0.00024569552643950997
[34m[1mwandb[0m: 	l2_factor: 7.18356172910808e-05
[34m[1mwandb[0m: 	lrel

Config del trial
{'GAE_lambda': 0.95, 'T': 768, 'activation': 'lrelu', 'actor_lr': 0.00017998524054240066, 'adv_std': True, 'bn': True, 'clipping_epsilon': 0.2, 'critic_lr': 0.007473688276785075, 'decay_method': 'exponential', 'dropout_prob': 0.3, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.035500560212916345, 'epochs': 10, 'exponential_factor': 0.998385340901956, 'gamma': 0.95, 'hidden_sizes': [250, 350], 'initialization': 'orthogonal', 'input_size': 10, 'l1_factor': 0.00024569552643950997, 'l2_factor': 7.18356172910808e-05, 'lrelu': 0.001, 'minibatch_size': 256, 'momentum': 0.99, 'output_size': 6, 'target_kl': 0.01, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [250, 350], 'output_size': 6, 'dropout_prob': 0.3, 'activation': 'lrelu', 'lrelu': 0.001, 'bn': True, 'momentum': 0.99, 'initialization': 'orthogonal', 'GAE_lambda': 0.95, 'T': 768, 'actor_lr': 0.00017998524054240

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▂▂▃▃▃▃▃▃▃▄▄▃▅▃▄▄▆█▆▃▆▃▃▃▄▄▅▂▇▅██▅▅▅█▅▅▂
Duration/Mean_val_ep_duration,▁▂▂▂▂▂▃▄▄▃▃▃▇▄▅▂▄▇▆▅▂▂▄▃▅▆▅▅▅▃▆▄▅█▅▅▆▃▅▆
Learning_rate/Actor,███▇▇▇▇▇▇▆▆▆▆▆▅▅▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▁▁▁
Learning_rate/Critic,███▇▇▇▇▇▇▆▆▆▆▆▅▅▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▁▁▁
Loss/Actor_loss,▁▄▇▇▆▅▅▆▆▅█▇▅▆▆▆▇▆▆▅▂▅▄▄▃▃▅▅▄▄▄█▆▇▆▅▄▆▃▅
Loss/Critic_loss,▂█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Entropy_bonus,█▆▃▁▁▁▂▁▂▂▂▂▃▃▂▃▃▄▄▄▅▆▆▆▅▆▆▆▅▅▄▄▄▄▅▅▅▅▅▆
Loss/KL_divergence,▅▁▂▃▃▆▇▆██▇▇▆▇▇▇▇▇▇▆▅▆▆▅▆▆▅▅▆▆▅▇▅▇▇▅▆▇▅▇
Loss/Policy_loss,▁▅▇▇▆▄▅▆▆▅█▇▅▆▆▆▆▅▆▅▂▅▄▄▃▃▅▅▄▄▄█▇▇▆▅▄▆▃▅
Loss/Regularized_Actor_loss,██▇▆▆▅▅▅▄▄▄▄▄▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▂▁▁▁▁▁▁▁▁

0,1
Duration/Mean_train_ep_duration,39.0
Duration/Mean_val_ep_duration,95.0
Learning_rate/Actor,0.00016
Learning_rate/Critic,0.00684
Loss/Actor_loss,-0.08387
Loss/Critic_loss,8.42558
Loss/Entropy_bonus,1.6764
Loss/KL_divergence,0.01038
Loss/Policy_loss,-0.02436
Loss/Regularized_Actor_loss,0.19668


[34m[1mwandb[0m: Agent Starting Run: doiq7s0c with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 768
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	actor_lr: 0.0008980291084318372
[34m[1mwandb[0m: 	adv_std: True
[34m[1mwandb[0m: 	bn: True
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.00013006066403488205
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.2
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.04134412138826988
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.8790557798450933
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	hidden_sizes: [150, 350, 350]
[34m[1mwandb[0m: 	initialization: uniform
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 9.0484727547071e-06
[34m[1mwandb[0m: 	l2_factor: 0.0009097125742153968
[34m[1mwandb[0m: 	lrel

Config del trial
{'GAE_lambda': 0.95, 'T': 768, 'activation': 'tanh', 'actor_lr': 0.0008980291084318372, 'adv_std': True, 'bn': True, 'clipping_epsilon': 0.2, 'critic_lr': 0.00013006066403488205, 'decay_method': 'exponential', 'dropout_prob': 0.2, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.04134412138826988, 'epochs': 10, 'exponential_factor': 0.8790557798450933, 'gamma': 0.99, 'hidden_sizes': [150, 350, 350], 'initialization': 'uniform', 'input_size': 10, 'l1_factor': 9.0484727547071e-06, 'l2_factor': 0.0009097125742153968, 'lrelu': 0.001, 'minibatch_size': 128, 'momentum': 0.9, 'output_size': 6, 'target_kl': 0.01, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [150, 350, 350], 'output_size': 6, 'dropout_prob': 0.2, 'activation': 'tanh', 'lrelu': 0.001, 'bn': True, 'momentum': 0.9, 'initialization': 'uniform', 'GAE_lambda': 0.95, 'T': 768, 'actor_lr': 0.000898029108431837

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▂▂▃▄▆▃▃▅▆▅▄▇▅▅▆▅▇▅▇▇▇▇▇▇▄▆▆▅▄▆▅▇█▆▇█▆▆▆
Duration/Mean_val_ep_duration,▁▃▅▅▄▆▅▄▅▄▅▅▆█▇▅▆▄▆▅▆▆▆▅▆▆▆▇▅▅▇▇▅▇█▅█▅▆▇
Learning_rate/Actor,█▆▅▄▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,█▆▅▄▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,▁▃▄▂▇▆▄▇▄▅▄▆▅▇█▇█▇█▇▇█▇▇▆█▆▆█▇▆▆▅▅▇▅▅▇▆▆
Loss/Critic_loss,█▆▄▃▃▂▃▃▂▂▃▃▂▃▃▁▁▂▂▁▁▁▂▁▂▂▂▂▂▂▂▂▃▁▂▁▁▃▂▂
Loss/Entropy_bonus,█▇▆▄▄▃▂▁▃▂▃▂▃▂▁▂▂▃▂▃▃▂▂▂▃▃▂▂▂▂▃▂▂▃▂▂▂▂▂▂
Loss/KL_divergence,▄▇▇▇▇▇██▁▄▃▅▄▃▂▅▆▃▄▇▃▅▄▅▃▄▄▅▄▂▆▄▃▄▄▅▄▃▄▄
Loss/Policy_loss,▁▄▄▁█▅▃▆▃▄▄▅▄▆▇▇█▆█▇▇█▇▇▆█▅▅█▆▅▅▄▄▇▅▄▇▆▆
Loss/Regularized_Actor_loss,█▅▄▃▃▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
Duration/Mean_train_ep_duration,149.60001
Duration/Mean_val_ep_duration,177.0
Learning_rate/Actor,0.0
Learning_rate/Critic,0.0
Loss/Actor_loss,-0.05852
Loss/Critic_loss,10.23569
Loss/Entropy_bonus,1.35582
Loss/KL_divergence,-0.00228
Loss/Policy_loss,-0.00246
Loss/Regularized_Actor_loss,0.43546


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: et0v74hy with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 512
[34m[1mwandb[0m: 	activation: lrelu
[34m[1mwandb[0m: 	actor_lr: 0.0029033063094051305
[34m[1mwandb[0m: 	adv_std: True
[34m[1mwandb[0m: 	bn: False
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.000683006572710053
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.3
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.034065828907865206
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.980925189282505
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	hidden_sizes: [350, 350, 350, 150]
[34m[1mwandb[0m: 	initialization: orthogonal
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 7.50589

Config del trial
{'GAE_lambda': 0.95, 'T': 512, 'activation': 'lrelu', 'actor_lr': 0.0029033063094051305, 'adv_std': True, 'bn': False, 'clipping_epsilon': 0.2, 'critic_lr': 0.000683006572710053, 'decay_method': 'exponential', 'dropout_prob': 0.3, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.034065828907865206, 'epochs': 10, 'exponential_factor': 0.980925189282505, 'gamma': 0.99, 'hidden_sizes': [350, 350, 350, 150], 'initialization': 'orthogonal', 'input_size': 10, 'l1_factor': 7.505891016580974e-06, 'l2_factor': 0.0002676275193260633, 'lrelu': 0.1, 'minibatch_size': 256, 'momentum': 0.95, 'output_size': 6, 'target_kl': 0.03, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [350, 350, 350, 150], 'output_size': 6, 'dropout_prob': 0.3, 'activation': 'lrelu', 'lrelu': 0.1, 'bn': False, 'momentum': 0.95, 'initialization': 'orthogonal', 'GAE_lambda': 0.95, 'T': 512, 'actor_lr': 0.

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▂▂▂▆▇▄▅▄▆▅▆▄█▇▇▄▇▄▇▄▇▄▇▄▆▇▄▄▄▄▄▄▇▄▄▆▆▆▄
Duration/Mean_val_ep_duration,▁▁▁▁▂▁▁▁▁▂▁▁▂▂▁▂▁▂▁▅▁▁▂▂▁█▁▁▁█▂▂▁▂▁▁▂▂▂▁
Learning_rate/Actor,██▇▇▇▆▆▆▆▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁
Learning_rate/Critic,██▇▇▇▆▆▆▆▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁
Loss/Actor_loss,█▆▆▅▂▂▃▁▂▂█▂▆▁▂▂▂█▂▃▂▂▂▁▃▂▂▂▂▂▄▁▂▂▂▂▁▃▂▂
Loss/Critic_loss,█▄▃▃▅▄▃▂▅▃▄▂▄▃▃▃▃▃▂▂▃▂▃▄▃▂▄▃▂▂▁▄▃▂▃▂▃▃▃▄
Loss/Entropy_bonus,█▇▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/KL_divergence,▆█▅▅▂▁▄▁▃▃▃▁▃▁▁▂▁▃▁▂▂▁▃▁▂▂▁▂▁▁▁▁▁▂▂▂▁▃▂▁
Loss/Policy_loss,█▆▆▅▂▂▂▁▂▁▇▂▆▁▂▂▂▇▂▃▂▂▂▁▂▂▂▁▁▂▄▁▁▂▂▂▁▃▂▂
Loss/Regularized_Actor_loss,█▇▇▆▅▅▄▃▄▃▅▃▅▃▃▂▂▄▂▂▂▂▂▁▂▂▂▁▁▁▂▁▁▁▁▁▁▂▁▁

0,1
Duration/Mean_train_ep_duration,67.0
Duration/Mean_val_ep_duration,67.0
Learning_rate/Actor,0.00061
Learning_rate/Critic,0.00014
Loss/Actor_loss,-0.00833
Loss/Critic_loss,16.68449
Loss/Entropy_bonus,0.01046
Loss/KL_divergence,-0.00382
Loss/Policy_loss,-0.00797
Loss/Regularized_Actor_loss,0.09279


[34m[1mwandb[0m: Agent Starting Run: fkqp375b with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 512
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	actor_lr: 0.0005180656404794369
[34m[1mwandb[0m: 	adv_std: True
[34m[1mwandb[0m: 	bn: False
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.0016204377952108307
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.1
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.005761529568735706
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.8838766736775941
[34m[1mwandb[0m: 	gamma: 0.95
[34m[1mwandb[0m: 	hidden_sizes: [150, 350, 150]
[34m[1mwandb[0m: 	initialization: orthogonal
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 0.0001588257060834471
[34m[1mwandb[0m: 	l2_factor: 6.07911102117607e-05
[34m[1mwandb[0m: 

Config del trial
{'GAE_lambda': 0.95, 'T': 512, 'activation': 'tanh', 'actor_lr': 0.0005180656404794369, 'adv_std': True, 'bn': False, 'clipping_epsilon': 0.2, 'critic_lr': 0.0016204377952108307, 'decay_method': 'exponential', 'dropout_prob': 0.1, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.005761529568735706, 'epochs': 10, 'exponential_factor': 0.8838766736775941, 'gamma': 0.95, 'hidden_sizes': [150, 350, 150], 'initialization': 'orthogonal', 'input_size': 10, 'l1_factor': 0.0001588257060834471, 'l2_factor': 6.07911102117607e-05, 'lrelu': 0.01, 'minibatch_size': 128, 'momentum': 0.8, 'output_size': 6, 'target_kl': 0.02, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [150, 350, 150], 'output_size': 6, 'dropout_prob': 0.1, 'activation': 'tanh', 'lrelu': 0.01, 'bn': False, 'momentum': 0.8, 'initialization': 'orthogonal', 'GAE_lambda': 0.95, 'T': 512, 'actor_lr': 0.00051806564

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▄▅▇▇▇▇▆▇▇▇▆▆▆▇▇▇▆▇▇▇▆▇▇▇▇▇▆▆▆██▇█▇▆▇▇▇█
Duration/Mean_val_ep_duration,▁▁▃▇█▅▃▂▆▅▆▇▆▇▅▅▂▆█▅▆▇▅██▇▅▅▅▇▆▆█▆▆▅▇▇▅▅
Learning_rate/Actor,█▆▅▄▄▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,█▆▅▄▄▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,▁▄▁▅▃▆▄▄▄▄▅▆▅▇▅▅▆▆▇▆▆▆▆▇▇▆▇▇▅▇▇▆▆▇▅▆█▇▇█
Loss/Critic_loss,█▄▃▃▂▃▂▂▂▁▂▁▂▁▁▁▁▁▁▁▂▁▁▁▂▁▁▁▂▁▂▁▁▁▂▂▁▁▁▁
Loss/Entropy_bonus,█▇▅▅▄▃▃▂▂▂▂▁▂▂▁▁▂▂▂▁▂▁▁▁▁▁▁▁▁▁▂▂▂▂▂▁▁▁▁▂
Loss/KL_divergence,▃▄▃▂▁▂▂▅▄█▁▁▅▂▃▂▁▂▁▁▁▂▂▁▁▁▂▂▂▁▂▁▂▁▁▁▂▂▁▁
Loss/Policy_loss,▁▄▁▅▂▆▄▄▄▃▅▆▅▇▅▅▆▆▇▆▆▆▆▇▇▆▇▇▅▆▇▆▆▇▄▆█▇▇█
Loss/Regularized_Actor_loss,█▅▃▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
Duration/Mean_train_ep_duration,56.25
Duration/Mean_val_ep_duration,54.4
Learning_rate/Actor,0.0
Learning_rate/Critic,0.0
Loss/Actor_loss,-0.00902
Loss/Critic_loss,5.4695
Loss/Entropy_bonus,1.32785
Loss/KL_divergence,-0.00158
Loss/Policy_loss,-0.00137
Loss/Regularized_Actor_loss,0.15286


[34m[1mwandb[0m: Agent Starting Run: 60nrelji with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 1024
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	actor_lr: 0.004052579156120559
[34m[1mwandb[0m: 	adv_std: True
[34m[1mwandb[0m: 	bn: True
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.0034942502153542236
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.3
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.04496687499725916
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.9401843961074492
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	hidden_sizes: [250, 150, 350, 250]
[34m[1mwandb[0m: 	initialization: uniform
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 5.568161181612745e-06
[34m[1mwandb[0m: 	l2_factor: 1.886186219426817e-05
[34m[1mwandb[0m:

Config del trial
{'GAE_lambda': 0.95, 'T': 1024, 'activation': 'tanh', 'actor_lr': 0.004052579156120559, 'adv_std': True, 'bn': True, 'clipping_epsilon': 0.2, 'critic_lr': 0.0034942502153542236, 'decay_method': 'exponential', 'dropout_prob': 0.3, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.04496687499725916, 'epochs': 10, 'exponential_factor': 0.9401843961074492, 'gamma': 0.99, 'hidden_sizes': [250, 150, 350, 250], 'initialization': 'uniform', 'input_size': 10, 'l1_factor': 5.568161181612745e-06, 'l2_factor': 1.886186219426817e-05, 'lrelu': 0.001, 'minibatch_size': 32, 'momentum': 0.8, 'output_size': 6, 'target_kl': 0.02, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [250, 150, 350, 250], 'output_size': 6, 'dropout_prob': 0.3, 'activation': 'tanh', 'lrelu': 0.001, 'bn': True, 'momentum': 0.8, 'initialization': 'uniform', 'GAE_lambda': 0.95, 'T': 1024, 'actor_lr': 0.0040525

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▁▂▂▃▃▂▂▃▃▃▄▆▄▃▄▄▄▅█▄▄▄▄▃▅▃▅▆▅▅▅▅▄▄▄▆▄▅▄
Duration/Mean_val_ep_duration,▁▁▂▂▂▃▂▃▄▃▄▄▆▄▄▆▅▄▄▅▇▆▆▇▄▄▆▆▇▅▄▅▆▆▆█▅▇▆▇
Learning_rate/Actor,█▇▆▆▅▄▄▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,█▇▆▆▅▄▄▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,▃▃█▃▃▂▅▃▄▇▆▄▆▄▅▄▃▆▆▅▂▁▅▃▆▂▆▄▄▄▁▇▃▅▅▄▇▄▄▄
Loss/Critic_loss,█▆▃▃▂▃▂▂▂▄▃▂▁▃▃▃▃▃▂▂▂▂▃▃▅▄▃▁▃▂▂▂▂▂▁▃▂▅▂▂
Loss/Entropy_bonus,█▅▂▃▄▄▂▁▁▁▁▁▃▁▂▃▂▂▂▄▄▄▄▃▃▃▃▂▂▃▃▃▃▃▄▄▄▃▃▃
Loss/KL_divergence,▃▄█▄▆▃▃▂▄▅▁▃▂▅▄▂▄▆▄▃▅▃▇▃▆▂▂▅▃▃▂▂▃▄▄▃▅▂▄▃
Loss/Policy_loss,▄▃█▃▃▂▅▃▄▇▆▄▆▃▅▄▃▆▆▅▂▁▅▃▆▂▆▄▄▄▁▇▂▅▅▄█▄▄▄
Loss/Regularized_Actor_loss,▇▅█▄▃▃▅▃▄▆▆▃▅▃▄▃▃▅▅▄▂▁▅▂▅▂▅▄▃▃▁▆▂▄▄▃▆▄▃▃

0,1
Duration/Mean_train_ep_duration,90.22222
Duration/Mean_val_ep_duration,153.2
Learning_rate/Actor,1e-05
Learning_rate/Critic,1e-05
Loss/Actor_loss,-0.08012
Loss/Critic_loss,6.49877
Loss/Entropy_bonus,1.57689
Loss/KL_divergence,-0.00039
Loss/Policy_loss,-0.00921
Loss/Regularized_Actor_loss,-0.06791


[34m[1mwandb[0m: Agent Starting Run: 83qhrogs with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 256
[34m[1mwandb[0m: 	activation: lrelu
[34m[1mwandb[0m: 	actor_lr: 0.008144618098424513
[34m[1mwandb[0m: 	adv_std: False
[34m[1mwandb[0m: 	bn: True
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.0002600125533695972
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.3
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.0016039393939105925
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.975944427020444
[34m[1mwandb[0m: 	gamma: 0.95
[34m[1mwandb[0m: 	hidden_sizes: [350, 150, 350, 250]
[34m[1mwandb[0m: 	initialization: uniform
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 3.24685188659723e-05
[34m[1mwandb[0m: 	l2_factor: 4.586853145018579e-05
[34m[1mwandb[0m

Config del trial
{'GAE_lambda': 0.95, 'T': 256, 'activation': 'lrelu', 'actor_lr': 0.008144618098424513, 'adv_std': False, 'bn': True, 'clipping_epsilon': 0.2, 'critic_lr': 0.0002600125533695972, 'decay_method': 'exponential', 'dropout_prob': 0.3, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.0016039393939105925, 'epochs': 10, 'exponential_factor': 0.975944427020444, 'gamma': 0.95, 'hidden_sizes': [350, 150, 350, 250], 'initialization': 'uniform', 'input_size': 10, 'l1_factor': 3.24685188659723e-05, 'l2_factor': 4.586853145018579e-05, 'lrelu': 0.1, 'minibatch_size': 32, 'momentum': 0.95, 'output_size': 6, 'target_kl': 0.01, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [350, 150, 350, 250], 'output_size': 6, 'dropout_prob': 0.3, 'activation': 'lrelu', 'lrelu': 0.1, 'bn': True, 'momentum': 0.95, 'initialization': 'uniform', 'GAE_lambda': 0.95, 'T': 256, 'actor_lr': 0.00814461

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▁▂▂▂▂▁▂▂▂▄▂▄▄▄▄▄▅▄██▃▂▁▁▂▂▂▂▂▂▂▁▂▂▂▂▁▁▃
Duration/Mean_val_ep_duration,▂▂▂▁▂▁▂▂▁▃▂▃▃▄▃█▄▄▃█▇▅▂▁▁▂▁▂▂▂▂▂▁▂▂▂▁▁▃▃
Learning_rate/Actor,██▇▇▇▇▆▆▆▆▅▅▅▄▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁
Learning_rate/Critic,██▇▇▇▇▆▆▆▆▅▅▅▄▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁
Loss/Actor_loss,▅▅▅▅▇▄█▅▅▂▄▃▃▂▂▂▄▂▃▁▁▁▃▃▅▅▃▄▃▅▄▆▆▅▅▄▅▄▄▄
Loss/Critic_loss,▅▄▄▅▃▂█▃▂▅▃▂▃▃▂▄▁▁▂▁▄▃▃▂▄▄▁▂▂▂▁▂▁▁▂▁▁▁▁▅
Loss/Entropy_bonus,█▄▂▁▁▁▂▁▂▂▂▂▁▁▂▂▁▁▁▁▁▂▁▁▁▂▁▁▁▁▁▁▂▁▂▂▂▂▁▁
Loss/KL_divergence,▆▁█▅█▅▇▆▅▅▆▆▆▅▅▆▆▆▇▄▆▅▅▄▅▄▄▅▃▅▅▅▆▆▂▅▅▃▄▅
Loss/Policy_loss,▅▅▅▅▇▄█▅▅▂▄▃▃▂▂▂▄▂▃▁▁▁▃▃▅▅▃▄▃▅▄▆▆▅▅▄▅▄▄▄
Loss/Regularized_Actor_loss,▅▅▅▅▇▄█▅▅▂▄▃▃▂▂▂▄▂▃▁▁▁▃▃▅▅▃▄▃▅▄▆▆▅▅▄▅▄▄▄

0,1
Duration/Mean_train_ep_duration,78.0
Duration/Mean_val_ep_duration,93.6
Learning_rate/Actor,0.00184
Learning_rate/Critic,6e-05
Loss/Actor_loss,5.1959
Loss/Critic_loss,12.96979
Loss/Entropy_bonus,0.09517
Loss/KL_divergence,0.03039
Loss/Policy_loss,5.19605
Loss/Regularized_Actor_loss,6.30083


[34m[1mwandb[0m: Agent Starting Run: ke7ly39d with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 1024
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	actor_lr: 0.003206761128930413
[34m[1mwandb[0m: 	adv_std: False
[34m[1mwandb[0m: 	bn: True
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.00020650196370523735
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.2
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.002727261496555375
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.9717885516581692
[34m[1mwandb[0m: 	gamma: 0.9
[34m[1mwandb[0m: 	hidden_sizes: [250, 250, 250, 250]
[34m[1mwandb[0m: 	initialization: uniform
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 1.263922585940307e-06
[34m[1mwandb[0m: 	l2_factor: 0.00013152556125165007
[34m[1mwandb[

Config del trial
{'GAE_lambda': 0.95, 'T': 1024, 'activation': 'tanh', 'actor_lr': 0.003206761128930413, 'adv_std': False, 'bn': True, 'clipping_epsilon': 0.2, 'critic_lr': 0.00020650196370523735, 'decay_method': 'exponential', 'dropout_prob': 0.2, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.002727261496555375, 'epochs': 10, 'exponential_factor': 0.9717885516581692, 'gamma': 0.9, 'hidden_sizes': [250, 250, 250, 250], 'initialization': 'uniform', 'input_size': 10, 'l1_factor': 1.263922585940307e-06, 'l2_factor': 0.00013152556125165007, 'lrelu': 0.1, 'minibatch_size': 32, 'momentum': 0.9, 'output_size': 6, 'target_kl': 0.02, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [250, 250, 250, 250], 'output_size': 6, 'dropout_prob': 0.2, 'activation': 'tanh', 'lrelu': 0.1, 'bn': True, 'momentum': 0.9, 'initialization': 'uniform', 'GAE_lambda': 0.95, 'T': 1024, 'actor_lr': 0.00320676

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▂▄▆▆▅▅▅▄▄▇▇▆▄▄▃▃▃▄▄▃▃▃▃▃▃█▃▃▃▄▃▄▃▃▂▄▅▇█
Duration/Mean_val_ep_duration,▄▅▆▆▄▅▅▄▄▇█▇▄▄▄▄▄▄▄▁▄▃▃▃▃▆▄▃▃▃▃▄▃▃▂▄▄▅▆▇
Learning_rate/Actor,██▇▇▇▇▆▆▆▆▅▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁
Learning_rate/Critic,██▇▇▇▇▆▆▆▆▅▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁
Loss/Actor_loss,▃▅▂▃▂▃▃▄▃▃▁▂▅▄▄▄▂▄▄▅▃█▄▆▄▅▃▅▄▅▄▅▅▆▅▅▄▂▁▂
Loss/Critic_loss,▆▇▄▅▃▂▅▄▆▄▃▅▄█▂▁▂▃▃▃▂▄▃▅▄▂▆▆▂▃▃▃▃▄▅▅▇▅▄▄
Loss/Entropy_bonus,█▆▅▄▄▄▃▂▃▂▃▃▂▂▁▁▂▂▂▂▁▁▁▂▁▂▃▁▂▂▂▂▂▁▁▂▂▁▁▁
Loss/KL_divergence,▄▅▅▄▄▄▃▆▁▃▄▄▄▂▃▃▃▂▄▄▃▅▅▂▃▃█▄▂▄▃▄▄▄▄▁▅▅▄▃
Loss/Policy_loss,▃▅▂▃▂▃▃▄▃▃▁▂▅▄▄▄▂▄▄▅▃█▄▆▄▅▃▅▄▅▄▅▅▆▅▅▄▂▁▂
Loss/Regularized_Actor_loss,▃▅▂▃▂▃▃▄▃▃▁▂▅▄▄▄▂▄▄▅▃█▄▆▄▅▃▅▄▅▄▅▅▆▅▅▄▂▂▂

0,1
Duration/Mean_train_ep_duration,104.22222
Duration/Mean_val_ep_duration,107.4
Learning_rate/Actor,0.00099
Learning_rate/Critic,6e-05
Loss/Actor_loss,0.89316
Loss/Critic_loss,6.93656
Loss/Entropy_bonus,0.03836
Loss/KL_divergence,-0.00406
Loss/Policy_loss,0.89327
Loss/Regularized_Actor_loss,1.36824


[34m[1mwandb[0m: Agent Starting Run: r6ce1nsa with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 512
[34m[1mwandb[0m: 	activation: lrelu
[34m[1mwandb[0m: 	actor_lr: 0.0004728620279550953
[34m[1mwandb[0m: 	adv_std: True
[34m[1mwandb[0m: 	bn: False
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.0029223360328311993
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.04244329235857548
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.9844045459864492
[34m[1mwandb[0m: 	gamma: 0.9
[34m[1mwandb[0m: 	hidden_sizes: [350, 150, 250, 250]
[34m[1mwandb[0m: 	initialization: orthogonal
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 0.00024544710003721976
[34m[1mwandb[0m: 	l2_factor: 0.00035550788327161603
[34m[1mwandb

Config del trial
{'GAE_lambda': 0.95, 'T': 512, 'activation': 'lrelu', 'actor_lr': 0.0004728620279550953, 'adv_std': True, 'bn': False, 'clipping_epsilon': 0.2, 'critic_lr': 0.0029223360328311993, 'decay_method': 'exponential', 'dropout_prob': 0, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.04244329235857548, 'epochs': 10, 'exponential_factor': 0.9844045459864492, 'gamma': 0.9, 'hidden_sizes': [350, 150, 250, 250], 'initialization': 'orthogonal', 'input_size': 10, 'l1_factor': 0.00024544710003721976, 'l2_factor': 0.00035550788327161603, 'lrelu': 0.001, 'minibatch_size': 32, 'momentum': 0.95, 'output_size': 6, 'target_kl': 0.03, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [350, 150, 250, 250], 'output_size': 6, 'dropout_prob': 0, 'activation': 'lrelu', 'lrelu': 0.001, 'bn': False, 'momentum': 0.95, 'initialization': 'orthogonal', 'GAE_lambda': 0.95, 'T': 512, 'actor_lr': 0

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▂▂▂▃▃▃▆▅▅▄▇▄▇▆▇█▆▃▇▃▅▄▅▄▄▃▃▅▄▄▄▄▃▅▄▄▅▄█
Duration/Mean_val_ep_duration,▂▁▂▂▂▆▇▅▄▄▄▄█▄▅▄▄▆▅▄▄▅▃▃▃▃▃▃▄▃▃▄▃▃▃▃▃▄▄▅
Learning_rate/Actor,███▇▇▇▇▆▆▆▆▅▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁
Learning_rate/Critic,███▇▇▇▇▆▆▆▆▅▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁
Loss/Actor_loss,▂▁▁█▃▂▃▃▂▃▂▁▁▂▃▃▃▂▃▃▂▂▁▃▂▂▃▂▂▄▂▃▃▂▂▄▄▂▄▂
Loss/Critic_loss,█▆▃▃▃▃▃▄▃▂▂▂▃▁▂▂▁▁▂▂▃▁▁▂▂▂▁▁▃▂▁▂▂▁▁▃▁▁▂▂
Loss/Entropy_bonus,██▇▆▆▅▆▅▆▆▆▅▅▆▆▆▆▆▅▆▅▄▄▄▄▄▃▂▂▂▁▂▂▂▃▂▂▃▃▄
Loss/KL_divergence,▄▃▅█▄▆▄▃▃▄▄▃▇▃▃▂▅▃▄▂▆▄▄▅▄▂▇▃▄▅▅▄▃▁▂▃▄▄▁▂
Loss/Policy_loss,▂▁▁█▃▂▃▃▂▃▂▁▁▂▂▃▃▂▃▃▂▂▁▂▂▂▂▁▂▃▁▃▂▂▁▃▃▁▃▂
Loss/Regularized_Actor_loss,█▄▃▃▂▁▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
Duration/Mean_train_ep_duration,145.0
Duration/Mean_val_ep_duration,94.1
Learning_rate/Actor,0.0002
Learning_rate/Critic,0.00121
Loss/Actor_loss,-0.09323
Loss/Critic_loss,3.24501
Loss/Entropy_bonus,1.34062
Loss/KL_divergence,-0.017
Loss/Policy_loss,-0.03633
Loss/Regularized_Actor_loss,-0.07116


[34m[1mwandb[0m: Agent Starting Run: mg0tz5ck with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 768
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	actor_lr: 0.0052650493798370515
[34m[1mwandb[0m: 	adv_std: True
[34m[1mwandb[0m: 	bn: True
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.006817389482693877
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.2
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.022634920610638997
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.937297037733714
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	hidden_sizes: [250, 250, 250, 350]
[34m[1mwandb[0m: 	initialization: uniform
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 0.0001701640774272107
[34m[1mwandb[0m: 	l2_factor: 7.583502565084314e-05
[34m[1mwandb[0m: 

Config del trial
{'GAE_lambda': 0.95, 'T': 768, 'activation': 'tanh', 'actor_lr': 0.0052650493798370515, 'adv_std': True, 'bn': True, 'clipping_epsilon': 0.2, 'critic_lr': 0.006817389482693877, 'decay_method': 'exponential', 'dropout_prob': 0.2, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.022634920610638997, 'epochs': 10, 'exponential_factor': 0.937297037733714, 'gamma': 0.99, 'hidden_sizes': [250, 250, 250, 350], 'initialization': 'uniform', 'input_size': 10, 'l1_factor': 0.0001701640774272107, 'l2_factor': 7.583502565084314e-05, 'lrelu': 0.1, 'minibatch_size': 256, 'momentum': 0.9, 'output_size': 6, 'target_kl': 0.01, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [250, 250, 250, 350], 'output_size': 6, 'dropout_prob': 0.2, 'activation': 'tanh', 'lrelu': 0.1, 'bn': True, 'momentum': 0.9, 'initialization': 'uniform', 'GAE_lambda': 0.95, 'T': 768, 'actor_lr': 0.005265049379

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▂▁▄▃▇▁▃▃▄▂▄▂▄▇▂▃▄▇▅▅▅▇▆▇▅▇▃▄█▆▆▇▆▇▇███▆▄
Duration/Mean_val_ep_duration,▁▂▂▃▄▂▂▃▆▄▃▄▂▄▃▄▆▄▅▆▇▆▆█▆▄▅▅▇█▆█▇▆█▆██▇▆
Learning_rate/Actor,█▇▆▅▄▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,█▇▆▅▄▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,█▃▃▄▄▃▄▂▃▃▆▂▂▃▄▄▁▅▃▃▄▄▄▄▃▄▅▄▄▄▄▅▅▄▆▄▅▅▄▄
Loss/Critic_loss,█▆▄▄▄▇▂▄▄▄▅▃▃▃▃▅▄▅▃▂▃▂▂▁▂▃▂▃▂▂▂▂▄▄▃▂▃▂▃▄
Loss/Entropy_bonus,▇▇█▅▄▆▆▅▅▆▄▄▅▄▅▄▃▃▃▂▂▂▁▁▂▁▁▁▁▁▁▁▁▁▂▂▁▂▂▂
Loss/KL_divergence,█▅▃▇▆▆▅▁▅▅▇▂▂▂▅▃▂▂▄▃▄▄▄▂▃▆▅▄▃▂▂▆▃▄▂▂▃▅▃▁
Loss/Policy_loss,█▃▃▄▄▃▄▂▃▄▆▂▂▃▄▄▁▅▃▃▄▃▄▃▃▄▅▄▃▄▄▄▄▄▅▄▄▅▄▄
Loss/Regularized_Actor_loss,█▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
Duration/Mean_train_ep_duration,74.625
Duration/Mean_val_ep_duration,114.2
Learning_rate/Actor,0.0
Learning_rate/Critic,0.0
Loss/Actor_loss,-0.03733
Loss/Critic_loss,11.3841
Loss/Entropy_bonus,1.39598
Loss/KL_divergence,-0.00775
Loss/Policy_loss,-0.00573
Loss/Regularized_Actor_loss,0.02876


[34m[1mwandb[0m: Agent Starting Run: vicrs3fw with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 256
[34m[1mwandb[0m: 	activation: lrelu
[34m[1mwandb[0m: 	actor_lr: 0.0018352252924859769
[34m[1mwandb[0m: 	adv_std: True
[34m[1mwandb[0m: 	bn: True
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.00018537676531231489
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.040986889057491725
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.932728628102976
[34m[1mwandb[0m: 	gamma: 0.9
[34m[1mwandb[0m: 	hidden_sizes: [350, 250]
[34m[1mwandb[0m: 	initialization: uniform
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 4.135279870320934e-06
[34m[1mwandb[0m: 	l2_factor: 1.424698266542142e-06
[34m[1mwandb[0m: 	lrelu: 0.

Config del trial
{'GAE_lambda': 0.95, 'T': 256, 'activation': 'lrelu', 'actor_lr': 0.0018352252924859769, 'adv_std': True, 'bn': True, 'clipping_epsilon': 0.2, 'critic_lr': 0.00018537676531231489, 'decay_method': 'exponential', 'dropout_prob': 0, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.040986889057491725, 'epochs': 10, 'exponential_factor': 0.932728628102976, 'gamma': 0.9, 'hidden_sizes': [350, 250], 'initialization': 'uniform', 'input_size': 10, 'l1_factor': 4.135279870320934e-06, 'l2_factor': 1.424698266542142e-06, 'lrelu': 0.01, 'minibatch_size': 256, 'momentum': 0.99, 'output_size': 6, 'target_kl': 0.03, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [350, 250], 'output_size': 6, 'dropout_prob': 0, 'activation': 'lrelu', 'lrelu': 0.01, 'bn': True, 'momentum': 0.99, 'initialization': 'uniform', 'GAE_lambda': 0.95, 'T': 256, 'actor_lr': 0.0018352252924859769, 'adv_std

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▄▁██████████████████████████████████████
Duration/Mean_val_ep_duration,▁███████████████████████████████████████
Learning_rate/Actor,██▇▇▆▆▅▅▅▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,██▇▇▆▆▅▅▅▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,▁▂▅▄▄▅▇▇█▇▄▅██▅▇▇▇█▇▇██▇▇▇█▇█▇███▇▆▇█▇█▇
Loss/Critic_loss,▄█▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Entropy_bonus,█▆▄▃▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▁▂▁▂▂▂▂▂▂
Loss/KL_divergence,█▅▁▂▂▃▅▅▅▅▄▄▆▇▆▆▇▇▇████▇▇▇███▇███▇▇▇███▇
Loss/Policy_loss,▁▂▄▂▂▃▆▅▇▆▁▃█▇▂▇▅▆█▇▇██▆▇▆▇▇█▇█▇▇▇▅▆█▇█▇
Loss/Regularized_Actor_loss,▁▂▅▄▄▅▇▇█▇▄▅██▅▇▆▇█▇▇██▇▇▇▇▇█▇██▇▇▆▇█▇█▇

0,1
Duration/Mean_train_ep_duration,36.5
Duration/Mean_val_ep_duration,36.2
Learning_rate/Actor,7e-05
Learning_rate/Critic,1e-05
Loss/Actor_loss,-0.02518
Loss/Critic_loss,8.60108
Loss/Entropy_bonus,0.39589
Loss/KL_divergence,-0.0064
Loss/Policy_loss,-0.00896
Loss/Regularized_Actor_loss,0.00068


[34m[1mwandb[0m: Agent Starting Run: e56xosey with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 512
[34m[1mwandb[0m: 	activation: lrelu
[34m[1mwandb[0m: 	actor_lr: 0.0003890321865708933
[34m[1mwandb[0m: 	adv_std: False
[34m[1mwandb[0m: 	bn: False
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.0029868783807762618
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.019452482745950593
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.992071384721833
[34m[1mwandb[0m: 	gamma: 0.9
[34m[1mwandb[0m: 	hidden_sizes: [350, 150, 250]
[34m[1mwandb[0m: 	initialization: orthogonal
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 1.3360839403032858e-06
[34m[1mwandb[0m: 	l2_factor: 3.508372418204239e-05
[34m[1mwandb[0m: 

Config del trial
{'GAE_lambda': 0.95, 'T': 512, 'activation': 'lrelu', 'actor_lr': 0.0003890321865708933, 'adv_std': False, 'bn': False, 'clipping_epsilon': 0.2, 'critic_lr': 0.0029868783807762618, 'decay_method': 'exponential', 'dropout_prob': 0, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.019452482745950593, 'epochs': 10, 'exponential_factor': 0.992071384721833, 'gamma': 0.9, 'hidden_sizes': [350, 150, 250], 'initialization': 'orthogonal', 'input_size': 10, 'l1_factor': 1.3360839403032858e-06, 'l2_factor': 3.508372418204239e-05, 'lrelu': 0.1, 'minibatch_size': 256, 'momentum': 0.99, 'output_size': 6, 'target_kl': 0.01, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [350, 150, 250], 'output_size': 6, 'dropout_prob': 0, 'activation': 'lrelu', 'lrelu': 0.1, 'bn': False, 'momentum': 0.99, 'initialization': 'orthogonal', 'GAE_lambda': 0.95, 'T': 512, 'actor_lr': 0.000389032186

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▄▃▆▇█▇▆███▇█▇▇█▇▇▇██▇▇▇▇▇▇███████▇▇▇█▇▇
Duration/Mean_val_ep_duration,▃▁▃▃▆▄▇▇▇▇▇▇▇▇▇▇▇▇▆▇▆▆▆▇▆▇▇▅▇█▇▇▇▆▆▇▇▆▇█
Learning_rate/Actor,███▇▇▇▇▇▆▆▆▆▆▅▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▁▁▁▁
Learning_rate/Critic,███▇▇▇▇▇▆▆▆▆▆▅▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▁▁▁▁
Loss/Actor_loss,▄█▇▃▃▂▃▃▃▂▁▂▃▃▂▃▃▁▂▂▂▂▃▃▃▁▁▂▁▃▂▃▂▂▂▂▂▂▃▂
Loss/Critic_loss,▇█▃▂▂▂▂▂▂▁▁▁▂▂▂▂▁▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁
Loss/Entropy_bonus,█▆▅▅▆▅▅▄▃▂▂▂▂▃▃▂▂▁▁▁▁▁▁▁▁▁▁▂▁▂▂▂▁▁▁▂▁▂▂▁
Loss/KL_divergence,▇█▆▆▄▆▄▆▇▆▁▄▃▆▆▄▆▅▄▃▂▃▄▃▆▄▃▄▃▄▃▄▆▂▃▄▅▃▄▄
Loss/Policy_loss,▄█▇▃▃▂▃▃▃▂▁▂▃▃▂▃▃▁▂▂▂▂▃▃▃▁▁▂▁▃▂▃▂▂▂▂▂▂▃▂
Loss/Regularized_Actor_loss,▄█▇▃▃▂▃▃▃▂▁▂▃▃▂▃▃▁▂▂▂▂▃▃▃▁▁▂▁▃▂▃▂▂▂▂▂▂▃▂

0,1
Duration/Mean_train_ep_duration,51.11111
Duration/Mean_val_ep_duration,55.5
Learning_rate/Actor,0.00025
Learning_rate/Critic,0.00188
Loss/Actor_loss,6.54048
Loss/Critic_loss,1.69058
Loss/Entropy_bonus,0.52347
Loss/KL_divergence,-0.00512
Loss/Policy_loss,6.55066
Loss/Regularized_Actor_loss,6.57023


[34m[1mwandb[0m: Agent Starting Run: jhmxypqc with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 1024
[34m[1mwandb[0m: 	activation: lrelu
[34m[1mwandb[0m: 	actor_lr: 0.0029937580820940737
[34m[1mwandb[0m: 	adv_std: True
[34m[1mwandb[0m: 	bn: False
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.0004167242233592737
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.007037513330415272
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.9919659998497216
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	hidden_sizes: [150, 350, 250, 250]
[34m[1mwandb[0m: 	initialization: normal
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 0.00031764585669922873
[34m[1mwandb[0m: 	l2_factor: 0.0006572605363383026
[34m[1mwandb[0

Config del trial
{'GAE_lambda': 0.95, 'T': 1024, 'activation': 'lrelu', 'actor_lr': 0.0029937580820940737, 'adv_std': True, 'bn': False, 'clipping_epsilon': 0.2, 'critic_lr': 0.0004167242233592737, 'decay_method': 'exponential', 'dropout_prob': 0, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.007037513330415272, 'epochs': 10, 'exponential_factor': 0.9919659998497216, 'gamma': 0.99, 'hidden_sizes': [150, 350, 250, 250], 'initialization': 'normal', 'input_size': 10, 'l1_factor': 0.00031764585669922873, 'l2_factor': 0.0006572605363383026, 'lrelu': 0.001, 'minibatch_size': 32, 'momentum': 0.8, 'output_size': 6, 'target_kl': 0.03, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [150, 350, 250, 250], 'output_size': 6, 'dropout_prob': 0, 'activation': 'lrelu', 'lrelu': 0.001, 'bn': False, 'momentum': 0.8, 'initialization': 'normal', 'GAE_lambda': 0.95, 'T': 1024, 'actor_lr': 0.002993

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁█▇▆▇▇▇▇▇█▇▇█▇▇▇▆▇▇▇█▇▇▇▇███▇██████▇█▇▇█
Duration/Mean_val_ep_duration,▁▁▅▂▄▇▄█▇▆▄▆▆▆▅▄▇▅▆▆▅▇▆▆▅▆▇▆▆▆▇▇▇▆▅▅▇▆▆▆
Learning_rate/Actor,██▇▇▇▇▆▆▆▆▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁
Learning_rate/Critic,██▇▇▇▇▆▆▆▆▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁
Loss/Actor_loss,▅▇▃█▄▆▇▁▅▄▃▅▆▅▇▆▅▄▇▂▆██▄▄▆▅▆▆█▄▇▅▄▆▄▄▇▆▅
Loss/Critic_loss,▁▅▄▅▄█▄▅▇▅▄▃▅▄▃▃▅▆▅▄▄▇▃▃▃▃▃▃▄▃▄▅▂▅▃▄▂▃▃▂
Loss/Entropy_bonus,█▇▇▆▅▅▅▅▅▅▄▃▄▄▄▄▃▄▄▄▃▂▂▂▂▂▂▁▁▁▁▁▂▂▂▂▂▂▂▂
Loss/KL_divergence,▁▇▇█▇▇▇▇▇▇▇█▇█▇▇██▇▇▇██▇▇▇▇██▇██▇▇▇█▇▇▇▇
Loss/Policy_loss,▅▇▃█▅▆▇▁▅▄▃▅▆▅▇▆▅▄▇▂▆▇█▄▄▅▅▅▆█▄▇▅▄▆▄▄▇▆▅
Loss/Regularized_Actor_loss,▅▇▃█▅▆▇▁▅▄▃▅▆▅▆▅▅▃▇▂▅▇█▄▃▅▅▅▅▇▃▆▄▃▅▃▃▆▅▄

0,1
Duration/Mean_train_ep_duration,45.71429
Duration/Mean_val_ep_duration,39.0
Learning_rate/Actor,0.00078
Learning_rate/Critic,0.00011
Loss/Actor_loss,-0.0079
Loss/Critic_loss,2.70417
Loss/Entropy_bonus,0.73057
Loss/KL_divergence,0.01585
Loss/Policy_loss,-0.00275
Loss/Regularized_Actor_loss,0.00608


[34m[1mwandb[0m: Agent Starting Run: e1sqf2su with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 256
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	actor_lr: 0.0013669635726713542
[34m[1mwandb[0m: 	adv_std: True
[34m[1mwandb[0m: 	bn: False
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.0004933848267949176
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.1
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.03997901933869671
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.9055316744558706
[34m[1mwandb[0m: 	gamma: 0.9
[34m[1mwandb[0m: 	hidden_sizes: [150, 150, 250]
[34m[1mwandb[0m: 	initialization: uniform
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 0.00014926236462563936
[34m[1mwandb[0m: 	l2_factor: 0.0001065560773851522
[34m[1mwandb[0m: 	lr

Config del trial
{'GAE_lambda': 0.95, 'T': 256, 'activation': 'tanh', 'actor_lr': 0.0013669635726713542, 'adv_std': True, 'bn': False, 'clipping_epsilon': 0.2, 'critic_lr': 0.0004933848267949176, 'decay_method': 'exponential', 'dropout_prob': 0.1, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.03997901933869671, 'epochs': 10, 'exponential_factor': 0.9055316744558706, 'gamma': 0.9, 'hidden_sizes': [150, 150, 250], 'initialization': 'uniform', 'input_size': 10, 'l1_factor': 0.00014926236462563936, 'l2_factor': 0.0001065560773851522, 'lrelu': 0.01, 'minibatch_size': 32, 'momentum': 0.9, 'output_size': 6, 'target_kl': 0.01, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [150, 150, 250], 'output_size': 6, 'dropout_prob': 0.1, 'activation': 'tanh', 'lrelu': 0.01, 'bn': False, 'momentum': 0.9, 'initialization': 'uniform', 'GAE_lambda': 0.95, 'T': 256, 'actor_lr': 0.001366963572671354

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▃▃▄▄▃▃▃▄▄▄▅█▅▄▃▅▆▆▅▃▄▅▄▅▅▄▅▆▅▅▇▃▇▅▅▇▃▂▆
Duration/Mean_val_ep_duration,▁▂▅▃▄▅▃▅▃▄▄▄▆▆▃█▆▆▆▄▆▅▅▅▄▆▅▆▆▅▆▅▄▅▄▄▆▇▅▆
Learning_rate/Actor,█▇▅▄▄▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,█▇▅▄▄▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,▃▂▁█▂▇▄▅▅▅▃▄▃▄▅▆▅▄▅▅▆▅▄▆▅▅▄▆▅▆▅▅▄▅▅▅▅▅▅▅
Loss/Critic_loss,█▄▆▂▃▂▃▂▁▂▃▂▂▃▂▃▁▁▄▃▂▅▃▂▃▁▂▃▂▃▁▂▂▂▂▂▂▃▂▃
Loss/Entropy_bonus,█▆▅▄▄▅▄▃▃▂▃▃▃▃▃▂▃▃▂▂▂▂▂▁▃▂▂▂▂▂▂▃▂▂▂▁▃▁▂▂
Loss/KL_divergence,▆▆▆▆▁▆▃▆▆▇▄▆▅█▃▆▃▄▆▅▄▆▆▄▅▆▄▃▅▄▆▅▄▄▄▄▃▄▃▃
Loss/Policy_loss,▃▂▁█▁▇▄▅▄▄▃▃▃▄▅▆▅▄▅▅▆▅▄▆▅▅▄▅▅▅▅▅▄▅▅▄▄▅▅▄
Loss/Regularized_Actor_loss,█▅▃▄▂▃▂▂▂▂▁▁▁▁▁▂▁▁▁▁▂▁▁▂▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁

0,1
Duration/Mean_train_ep_duration,71.0
Duration/Mean_val_ep_duration,69.4
Learning_rate/Actor,0.0
Learning_rate/Critic,0.0
Loss/Actor_loss,-0.06994
Loss/Critic_loss,5.43052
Loss/Entropy_bonus,1.34777
Loss/KL_divergence,-0.00863
Loss/Policy_loss,-0.01605
Loss/Regularized_Actor_loss,0.10263


[34m[1mwandb[0m: Agent Starting Run: 0f5p099f with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 768
[34m[1mwandb[0m: 	activation: lrelu
[34m[1mwandb[0m: 	actor_lr: 0.0015898729814662598
[34m[1mwandb[0m: 	adv_std: True
[34m[1mwandb[0m: 	bn: False
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.0009282871046169554
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.3
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.01804968291806467
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.8607210863126157
[34m[1mwandb[0m: 	gamma: 0.9
[34m[1mwandb[0m: 	hidden_sizes: [250, 250, 250]
[34m[1mwandb[0m: 	initialization: normal
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 0.0006147232514912152
[34m[1mwandb[0m: 	l2_factor: 0.00028731083369277317
[34m[1mwandb[0m: 	lr

Config del trial
{'GAE_lambda': 0.95, 'T': 768, 'activation': 'lrelu', 'actor_lr': 0.0015898729814662598, 'adv_std': True, 'bn': False, 'clipping_epsilon': 0.2, 'critic_lr': 0.0009282871046169554, 'decay_method': 'exponential', 'dropout_prob': 0.3, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.01804968291806467, 'epochs': 10, 'exponential_factor': 0.8607210863126157, 'gamma': 0.9, 'hidden_sizes': [250, 250, 250], 'initialization': 'normal', 'input_size': 10, 'l1_factor': 0.0006147232514912152, 'l2_factor': 0.00028731083369277317, 'lrelu': 0.001, 'minibatch_size': 64, 'momentum': 0.99, 'output_size': 6, 'target_kl': 0.02, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [250, 250, 250], 'output_size': 6, 'dropout_prob': 0.3, 'activation': 'lrelu', 'lrelu': 0.001, 'bn': False, 'momentum': 0.99, 'initialization': 'normal', 'GAE_lambda': 0.95, 'T': 768, 'actor_lr': 0.00158987298146

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▁▂▂▂▂▂▃▂▃▃▃▆▄▄▄▄▇▄▄▆▅▅▅▄█▆▅▄▅▄▅▅▅▅▆▅▆▅▅
Duration/Mean_val_ep_duration,▁▁▂▂▂▂▂▂▃▅▃▅▄▇█▄▆▅▄▅▆▆▅▅▆▅▄▆▆▅▆▆▄▆▆▆▇▆▅▇
Learning_rate/Actor,█▇▅▄▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,█▇▅▄▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,█▁▂▂▂▂▂▂▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂
Loss/Critic_loss,█▁▃▄▆▄▃▃▄▄▆▅▃▃▄▃▃▃▄▃▂▂▄▃▃▂▂▂▃▂▃▃▃▃▃▄▂▁▁▃
Loss/Entropy_bonus,▁▃▅▅▆▅▆▅▆▆▆▇█▅▆▇▆▇▆▇▇▇▇▇▆▆▇▆▆▇▆▇▇▆▇▇▆▆▆▇
Loss/KL_divergence,█▁▃▂▂▃▂▂▃▃▃▃▃▃▂▃▃▃▃▃▃▃▃▃▂▃▂▃▂▃▃▂▃▃▂▃▃▃▃▃
Loss/Policy_loss,█▁▂▂▂▂▂▂▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂
Loss/Regularized_Actor_loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
Duration/Mean_train_ep_duration,67.63636
Duration/Mean_val_ep_duration,71.4
Learning_rate/Actor,0.0
Learning_rate/Critic,0.0
Loss/Actor_loss,0.0192
Loss/Critic_loss,8.306
Loss/Entropy_bonus,1.06953
Loss/KL_divergence,0.00101
Loss/Policy_loss,0.0385
Loss/Regularized_Actor_loss,0.06724


[34m[1mwandb[0m: Agent Starting Run: 7ztje6vd with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 256
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	actor_lr: 0.007808937764403372
[34m[1mwandb[0m: 	adv_std: True
[34m[1mwandb[0m: 	bn: False
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.004725791374954232
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.03229711593663933
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.8502460911098485
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	hidden_sizes: [350, 150]
[34m[1mwandb[0m: 	initialization: uniform
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 1.826547052415299e-06
[34m[1mwandb[0m: 	l2_factor: 0.0009231804665215436
[34m[1mwandb[0m: 	lrelu: 0.1


Config del trial
{'GAE_lambda': 0.95, 'T': 256, 'activation': 'tanh', 'actor_lr': 0.007808937764403372, 'adv_std': True, 'bn': False, 'clipping_epsilon': 0.2, 'critic_lr': 0.004725791374954232, 'decay_method': 'exponential', 'dropout_prob': 0, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.03229711593663933, 'epochs': 10, 'exponential_factor': 0.8502460911098485, 'gamma': 0.99, 'hidden_sizes': [350, 150], 'initialization': 'uniform', 'input_size': 10, 'l1_factor': 1.826547052415299e-06, 'l2_factor': 0.0009231804665215436, 'lrelu': 0.1, 'minibatch_size': 64, 'momentum': 0.9, 'output_size': 6, 'target_kl': 0.01, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [350, 150], 'output_size': 6, 'dropout_prob': 0, 'activation': 'tanh', 'lrelu': 0.1, 'bn': False, 'momentum': 0.9, 'initialization': 'uniform', 'GAE_lambda': 0.95, 'T': 256, 'actor_lr': 0.007808937764403372, 'adv_std': True,

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▃▁▄▄▄▆▆▆▇█▃▅▅▅▆▆▇▇▅█▂▇▇▆▇▇▇▇▇▆▇▇▅▅▆▆█▇▅
Duration/Mean_val_ep_duration,▂▁▅▄▅▆▄▄▅██▆▂▃▅▄▆▅▆▅▆▆▇▇▅▆▇▅▆▆▇▆▄█▆▆▆█▇▆
Learning_rate/Actor,█▆▅▄▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,█▆▅▄▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,▃█▆▃▂▄▃▁▁▁▂▃▃▃▂▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃
Loss/Critic_loss,█▇▇▅▅▆▃▃▁▂▁▃▂▁▁▂▂▂▂▂▂▂▃▁▂▂▂▂▂▂▂▂▁▂▂▁▂▂▂▅
Loss/Entropy_bonus,█▆▆▅▃▃▃▃▄▅▅▃▁▁▂▂▃▃▄▄▄▃▅▄▄▄▄▅▄▄▄▄▄▃▄▄▃▄▃▄
Loss/KL_divergence,▅▇█▁▄▆▁▂▁▄▃▆▅▆▁▂▄▃▃▃▃▄▄▃▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄
Loss/Policy_loss,▄█▆▃▂▄▃▁▁▁▂▃▃▂▂▂▃▃▃▂▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃
Loss/Regularized_Actor_loss,▇█▄▂▂▃▂▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂

0,1
Duration/Mean_train_ep_duration,63.5
Duration/Mean_val_ep_duration,77.0
Learning_rate/Actor,0.0
Learning_rate/Critic,0.0
Loss/Actor_loss,-0.04641
Loss/Critic_loss,11.0628
Loss/Entropy_bonus,1.43643
Loss/KL_divergence,0.0
Loss/Policy_loss,-1e-05
Loss/Regularized_Actor_loss,-0.02442


[34m[1mwandb[0m: Agent Starting Run: 19sck1eo with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 256
[34m[1mwandb[0m: 	activation: lrelu
[34m[1mwandb[0m: 	actor_lr: 0.005737003962212396
[34m[1mwandb[0m: 	adv_std: True
[34m[1mwandb[0m: 	bn: True
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.00022277023218567415
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.3
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.022346115711028677
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.9432818055899356
[34m[1mwandb[0m: 	gamma: 0.9
[34m[1mwandb[0m: 	hidden_sizes: [150, 250, 350, 150]
[34m[1mwandb[0m: 	initialization: orthogonal
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 8.914319128762696e-05
[34m[1mwandb[0m: 	l2_factor: 2.6850570155198736e-06
[34m[1mwandb

Config del trial
{'GAE_lambda': 0.95, 'T': 256, 'activation': 'lrelu', 'actor_lr': 0.005737003962212396, 'adv_std': True, 'bn': True, 'clipping_epsilon': 0.2, 'critic_lr': 0.00022277023218567415, 'decay_method': 'exponential', 'dropout_prob': 0.3, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.022346115711028677, 'epochs': 10, 'exponential_factor': 0.9432818055899356, 'gamma': 0.9, 'hidden_sizes': [150, 250, 350, 150], 'initialization': 'orthogonal', 'input_size': 10, 'l1_factor': 8.914319128762696e-05, 'l2_factor': 2.6850570155198736e-06, 'lrelu': 0.01, 'minibatch_size': 128, 'momentum': 0.9, 'output_size': 6, 'target_kl': 0.02, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [150, 250, 350, 150], 'output_size': 6, 'dropout_prob': 0.3, 'activation': 'lrelu', 'lrelu': 0.01, 'bn': True, 'momentum': 0.9, 'initialization': 'orthogonal', 'GAE_lambda': 0.95, 'T': 256, 'actor_lr': 0.

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▅▇▃▆██▇▅▇▇▇▆▇▆▆▆▆▆▆▆▄▆▇▆█▆▆▆▇▇▇▇▇▆▇▇▇▇▇
Duration/Mean_val_ep_duration,▁▃▃▃▆▆▆█▅▅▄▅▅▄▄▄▃▄▄▄▅▄▅▅▄▄▄▅▅▅▅▅▄▅▅▅▅▅▅▅
Learning_rate/Actor,█▇▆▅▄▄▃▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,█▇▆▅▄▄▃▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,▁▆▂▄▁▄▂▂▃▃▅▂▇▄▆▃▂▃▄▄▆▄▅█▃▄▃▅▅▅▄▅▄▅▃▃▄▄▃▆
Loss/Critic_loss,█▄▃▃▃▃▃▃▂▁▂▁▁▂▂▁▂▂▂▂▃▂▂▂▁▂▂▂▁▂▂▂▁▂▂▂▂▁▂▂
Loss/Entropy_bonus,▇▄██▇▅▆▇▄▃▃▃▃▃▂▂▁▂▂▂▁▂▁▁▁▁▁▂▁▁▁▂▂▂▂▂▂▂▂▂
Loss/KL_divergence,▄█▄▆▄▇▄▁▆█▇▄▇█▆▆▅▂█▅▃▆▇▅▆▇▃▇▂▇▂▇▃▇▄▃▅▄▃█
Loss/Policy_loss,▁▆▂▅▁▄▂▂▃▃▅▂▇▄▆▃▂▃▄▄▅▄▅█▂▄▃▅▅▄▄▅▄▅▃▃▄▃▃▅
Loss/Regularized_Actor_loss,▇█▃▄▃▄▂▂▂▂▃▁▄▂▃▂▁▁▂▂▂▂▂▃▁▂▁▂▂▂▂▂▁▂▁▁▂▁▁▂

0,1
Duration/Mean_train_ep_duration,61.66667
Duration/Mean_val_ep_duration,58.2
Learning_rate/Actor,0.0
Learning_rate/Critic,0.0
Loss/Actor_loss,0.03925
Loss/Critic_loss,6.20676
Loss/Entropy_bonus,1.0645
Loss/KL_divergence,0.03507
Loss/Policy_loss,0.06303
Loss/Regularized_Actor_loss,0.20528


[34m[1mwandb[0m: Agent Starting Run: xy65lks2 with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 512
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	actor_lr: 0.00015221838222414085
[34m[1mwandb[0m: 	adv_std: True
[34m[1mwandb[0m: 	bn: True
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.003696215347491537
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.04188146959753264
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.903731918460126
[34m[1mwandb[0m: 	gamma: 0.9
[34m[1mwandb[0m: 	hidden_sizes: [350, 350, 150, 350]
[34m[1mwandb[0m: 	initialization: orthogonal
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 0.0008005852231136583
[34m[1mwandb[0m: 	l2_factor: 7.222725348677951e-05
[34m[1mwandb[0m: 

Config del trial
{'GAE_lambda': 0.95, 'T': 512, 'activation': 'tanh', 'actor_lr': 0.00015221838222414085, 'adv_std': True, 'bn': True, 'clipping_epsilon': 0.2, 'critic_lr': 0.003696215347491537, 'decay_method': 'exponential', 'dropout_prob': 0, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.04188146959753264, 'epochs': 10, 'exponential_factor': 0.903731918460126, 'gamma': 0.9, 'hidden_sizes': [350, 350, 150, 350], 'initialization': 'orthogonal', 'input_size': 10, 'l1_factor': 0.0008005852231136583, 'l2_factor': 7.222725348677951e-05, 'lrelu': 0.01, 'minibatch_size': 32, 'momentum': 0.8, 'output_size': 6, 'target_kl': 0.02, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [350, 350, 150, 350], 'output_size': 6, 'dropout_prob': 0, 'activation': 'tanh', 'lrelu': 0.01, 'bn': True, 'momentum': 0.8, 'initialization': 'orthogonal', 'GAE_lambda': 0.95, 'T': 512, 'actor_lr': 0.0001522183

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▁▂▃▃▂▃▃▆▃▃▆▄▂▂▃▄█▃▄▁▄▄▄▄▂▄▃▇▂▃▃▁▂▇▂▅▇▄▃
Duration/Mean_val_ep_duration,▁▁▁▂▃▃▃▃▄▄█▆▆▇▅▃█▄▅█▆▅▅▄▃▄▄▄▃▄▅▅▄▄▄▄▅▄▂▄
Learning_rate/Actor,█▇▇▆▅▄▄▄▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,█▇▇▆▅▄▄▄▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,▂▁▃▂▅▂▃▂▃▂▂▂▃▁▂▁▇▄█▃▂▄▅▃▂▄▃▆▄▂▄▃▂▃▅▃▅▆▂▃
Loss/Critic_loss,█▆▃▂▁▂▁▂▃▃▁▁▂▂▃▂▁▁▁▁▂▂▂▁▁▁▁▁▁▃▂▂▃▁▁▁▂▁▂▁
Loss/Entropy_bonus,█▇▇▇▅▅▅▆▃▄▄▄▆▃▅▅▅▄▄▃▄▃▁▅▄▄▃▅▅▃▃▂▄▂▂▃▂▂▂▁
Loss/KL_divergence,▄▂▁▂▄▇▂▄▄▃▅▅▄▃▂▂▄▃▁▃▄▁▁▁▁▂▃▆▂▃▄▄▄▅▂▂▃▃█▃
Loss/Policy_loss,▃▁▃▂▅▂▃▂▃▂▂▂▃▁▂▁▇▄█▃▂▄▅▃▂▄▃▇▅▂▄▃▂▃▅▃▅▆▂▃
Loss/Regularized_Actor_loss,█▆▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
Duration/Mean_train_ep_duration,99.0
Duration/Mean_val_ep_duration,137.5
Learning_rate/Actor,0.0
Learning_rate/Critic,1e-05
Loss/Actor_loss,-0.09162
Loss/Critic_loss,1.8681
Loss/Entropy_bonus,1.64893
Loss/KL_divergence,0.00019
Loss/Policy_loss,-0.02256
Loss/Regularized_Actor_loss,1.51604


[34m[1mwandb[0m: Agent Starting Run: cmu6tnwn with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 1024
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	actor_lr: 0.00015893232929343475
[34m[1mwandb[0m: 	adv_std: True
[34m[1mwandb[0m: 	bn: True
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.004771544674128819
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.2
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.029901548193467596
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.9394569340757029
[34m[1mwandb[0m: 	gamma: 0.9
[34m[1mwandb[0m: 	hidden_sizes: [350, 150, 150]
[34m[1mwandb[0m: 	initialization: normal
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 4.682310327648073e-05
[34m[1mwandb[0m: 	l2_factor: 8.326398290580943e-05
[34m[1mwandb[0m: 	lre

Config del trial
{'GAE_lambda': 0.95, 'T': 1024, 'activation': 'tanh', 'actor_lr': 0.00015893232929343475, 'adv_std': True, 'bn': True, 'clipping_epsilon': 0.2, 'critic_lr': 0.004771544674128819, 'decay_method': 'exponential', 'dropout_prob': 0.2, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.029901548193467596, 'epochs': 10, 'exponential_factor': 0.9394569340757029, 'gamma': 0.9, 'hidden_sizes': [350, 150, 150], 'initialization': 'normal', 'input_size': 10, 'l1_factor': 4.682310327648073e-05, 'l2_factor': 8.326398290580943e-05, 'lrelu': 0.001, 'minibatch_size': 32, 'momentum': 0.99, 'output_size': 6, 'target_kl': 0.03, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [350, 150, 150], 'output_size': 6, 'dropout_prob': 0.2, 'activation': 'tanh', 'lrelu': 0.001, 'bn': True, 'momentum': 0.99, 'initialization': 'normal', 'GAE_lambda': 0.95, 'T': 1024, 'actor_lr': 0.0001589323292934

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▂▂▂▃▅▆▇▄▅▅█▇▅▅▆▆▆▇▇▅▅▅▅▅▆▅▆▆▅▅▅▆▅▅▅▅▆▅▆
Duration/Mean_val_ep_duration,▁▁▄▅▆▅▇▆▄▆▇█▄▆▆▅▇█▆▅▇▄▅▄▆▅▆▄▅▄▅▄▆▆▆▄▃▅▅▄
Learning_rate/Actor,██▇▆▆▅▅▅▄▄▄▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,██▇▆▆▅▅▅▄▄▄▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,▁▂▁▄▂▂▂▂▄▁▁▃▆▄▄▃▅▅▃▃▄▄▂▃▃▄▁▄▄▅▃▄▃▃▂▅▂▇▄█
Loss/Critic_loss,█▄▄▃▅▅▄▅▅▂▂▁▁▄▁▂▂▄▁▁▂▂▃▃▂▂▂▁▁▁▃▂▃▂▂▂▁▃▁▂
Loss/Entropy_bonus,██▇▇▆▆▆▆▅▅▅▅▄▄▄▄▄▃▃▃▃▃▃▃▃▂▂▁▂▂▂▁▃▂▁▂▁▂▁▂
Loss/KL_divergence,▂▃▂▃▃▂▂▃▂▃▄▁▂▁▅▄▃▁▂▂▂▃▂▄▁▂▄▇▁▁▁▃▃▃█▂▃▁▃▅
Loss/Policy_loss,▁▂▂▄▂▂▂▂▄▁▁▃▆▄▃▃▅▅▃▃▄▄▂▃▃▃▁▃▄▅▃▄▃▃▂▄▂▇▄█
Loss/Regularized_Actor_loss,█▇▅▇▄▄▄▃▅▂▂▃▆▅▄▄▆▅▃▃▄▄▂▃▃▃▁▃▄▅▃▄▃▃▂▄▂▇▄█

0,1
Duration/Mean_train_ep_duration,132.0
Duration/Mean_val_ep_duration,91.4
Learning_rate/Actor,0.0
Learning_rate/Critic,0.00011
Loss/Actor_loss,0.08183
Loss/Critic_loss,2.43138
Loss/Entropy_bonus,1.36039
Loss/KL_divergence,0.03014
Loss/Policy_loss,0.12251
Loss/Regularized_Actor_loss,0.26896


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: a0mi4xva with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 1024
[34m[1mwandb[0m: 	activation: lrelu
[34m[1mwandb[0m: 	actor_lr: 0.0003711888311974464
[34m[1mwandb[0m: 	adv_std: False
[34m[1mwandb[0m: 	bn: False
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.002052301984690872
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.3
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.03369282791864424
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.989941847911452
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	hidden_sizes: [350, 150, 150]
[34m[1mwandb[0m: 	initialization: uniform
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 2.491361550929

Config del trial
{'GAE_lambda': 0.95, 'T': 1024, 'activation': 'lrelu', 'actor_lr': 0.0003711888311974464, 'adv_std': False, 'bn': False, 'clipping_epsilon': 0.2, 'critic_lr': 0.002052301984690872, 'decay_method': 'exponential', 'dropout_prob': 0.3, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.03369282791864424, 'epochs': 10, 'exponential_factor': 0.989941847911452, 'gamma': 0.99, 'hidden_sizes': [350, 150, 150], 'initialization': 'uniform', 'input_size': 10, 'l1_factor': 2.4913615509297983e-05, 'l2_factor': 2.8748087676914505e-06, 'lrelu': 0.01, 'minibatch_size': 256, 'momentum': 0.9, 'output_size': 6, 'target_kl': 0.01, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [350, 150, 150], 'output_size': 6, 'dropout_prob': 0.3, 'activation': 'lrelu', 'lrelu': 0.01, 'bn': False, 'momentum': 0.9, 'initialization': 'uniform', 'GAE_lambda': 0.95, 'T': 1024, 'actor_lr': 0.000371188831

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▃▃▄▅▆▄▅▆▄▄▄▅▃▄▅▇▅▇▄▆▄▆▆▇▅▅▃▆█▄▅▆▄▅▄▅▃▄▅
Duration/Mean_val_ep_duration,▂▂▁▄▃▃▂▄▅▇▁▁▂▅▃▅▅▄▆█▅▄▄▅▂▄▃▅▆▄▅▂▆▅▅▆▁▂▄▆
Learning_rate/Actor,███▇▇▇▇▆▆▆▆▆▅▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▁▁▁▁
Learning_rate/Critic,███▇▇▇▇▆▆▆▆▆▅▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▁▁▁▁
Loss/Actor_loss,█▆▃▄▃▃▃▃▂▂▂▃▂▃▂▂▂▂▂▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▃▂
Loss/Critic_loss,█▆▇▂▃▂▃▃▂▂▂▃▂▄▂▁▂▂▁▃▂▂▂▁▂▂▂▃▂▂▃▂▁▁▂▃▂▂▃▂
Loss/Entropy_bonus,▁▁▄▅▄▇▅▅█▇▇▇▇▇▇█▆▆▆▇▆▇█▇▇▇▇▇▇█▇███▇▇▇▇▇▇
Loss/KL_divergence,█▅▄▄▅▁▄▄▁▃▅▄▄▂▄▃▃▄▄▄▄▃▄▄▂▄▄▄▃▂▄▃▄▄▃▄▄▄▄▄
Loss/Policy_loss,█▆▃▄▃▃▃▃▂▂▂▃▂▃▂▂▂▂▂▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▃▂
Loss/Regularized_Actor_loss,█▆▃▄▃▃▃▃▂▂▂▃▂▃▂▂▂▂▂▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▃▂

0,1
Duration/Mean_train_ep_duration,43.45454
Duration/Mean_val_ep_duration,54.7
Learning_rate/Actor,0.00021
Learning_rate/Critic,0.00114
Loss/Actor_loss,18.36545
Loss/Critic_loss,11.61751
Loss/Entropy_bonus,1.6941
Loss/KL_divergence,0.01491
Loss/Policy_loss,18.42253
Loss/Regularized_Actor_loss,18.52979


[34m[1mwandb[0m: Agent Starting Run: zcwd3cwx with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 768
[34m[1mwandb[0m: 	activation: lrelu
[34m[1mwandb[0m: 	actor_lr: 0.0020391379396020315
[34m[1mwandb[0m: 	adv_std: True
[34m[1mwandb[0m: 	bn: False
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.0001493701248538289
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.1
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.02264339278667309
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.93826319032749
[34m[1mwandb[0m: 	gamma: 0.95
[34m[1mwandb[0m: 	hidden_sizes: [350, 350, 350]
[34m[1mwandb[0m: 	initialization: uniform
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 0.0002291442674219355
[34m[1mwandb[0m: 	l2_factor: 0.0004255506725380319
[34m[1mwandb[0m: 	lre

Config del trial
{'GAE_lambda': 0.95, 'T': 768, 'activation': 'lrelu', 'actor_lr': 0.0020391379396020315, 'adv_std': True, 'bn': False, 'clipping_epsilon': 0.2, 'critic_lr': 0.0001493701248538289, 'decay_method': 'exponential', 'dropout_prob': 0.1, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.02264339278667309, 'epochs': 10, 'exponential_factor': 0.93826319032749, 'gamma': 0.95, 'hidden_sizes': [350, 350, 350], 'initialization': 'uniform', 'input_size': 10, 'l1_factor': 0.0002291442674219355, 'l2_factor': 0.0004255506725380319, 'lrelu': 0.1, 'minibatch_size': 128, 'momentum': 0.99, 'output_size': 6, 'target_kl': 0.02, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [350, 350, 350], 'output_size': 6, 'dropout_prob': 0.1, 'activation': 'lrelu', 'lrelu': 0.1, 'bn': False, 'momentum': 0.99, 'initialization': 'uniform', 'GAE_lambda': 0.95, 'T': 768, 'actor_lr': 0.00203913793960203

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▂▄▄▄▅▄▅▄▇▇▆▆▅▅▄▄▆▆▆▅▅█▆▆▅▆▆▆▆▆▆▇▅▆▆▆▅▇▇
Duration/Mean_val_ep_duration,▁▂▃▄▅▃▄▄▅▅▆▄▅▆▆▅▅▆▅▅▅▆▅▅▅▅▅▅▅▇█▅▆▅▆▅█▇▆▆
Learning_rate/Actor,█▇▆▅▄▄▃▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,█▇▆▅▄▄▃▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,█▂▃▁▆▃▄▃▄▇▁▄▆▄▄▅▄▇▄▄▃▇▄▅▄▅▅▃▃▆▆▅▅▄▃▃▆▅▆▄
Loss/Critic_loss,▃▅█▇▇▅▆▄▆▂▃▃▂▅▃▄▄▃▂▃▂▂▁▁▂▂▁▂▃▂▂▃▂▃▁▂▄▂▂▂
Loss/Entropy_bonus,▁▆▇█▇█▇█▇▇▇▇▇▇▇▇▇▇███▇▇▇▇█▇██▇█▇█▇██████
Loss/KL_divergence,▅▃▅▅▃▃▂▅▆▂▇▄█▇██▇▁▃▄▆▆▆▅▆▇▅▅▄▅▇▅▆▄▄▄▅▅▆▇
Loss/Policy_loss,▅▁▃▁▇▄▄▃▃▇▁▄▆▄▄▅▄█▅▄▄█▅▅▄▅▆▃▄▆▆▆▆▄▄▃▇▆▇▄
Loss/Regularized_Actor_loss,█▂▁▁▂▁▁▁▁▂▁▁▁▁▁▁▁▂▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
Duration/Mean_train_ep_duration,94.25
Duration/Mean_val_ep_duration,81.5
Learning_rate/Actor,0.0
Learning_rate/Critic,0.0
Loss/Actor_loss,-0.03317
Loss/Critic_loss,8.78286
Loss/Entropy_bonus,1.31111
Loss/KL_divergence,0.00802
Loss/Policy_loss,-0.00349
Loss/Regularized_Actor_loss,-0.01711


[34m[1mwandb[0m: Agent Starting Run: k6x8s6dy with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 512
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	actor_lr: 0.0009946089387856578
[34m[1mwandb[0m: 	adv_std: False
[34m[1mwandb[0m: 	bn: True
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.0013294521847346972
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.3
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.021713228241210053
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.930294471916696
[34m[1mwandb[0m: 	gamma: 0.9
[34m[1mwandb[0m: 	hidden_sizes: [150, 350, 350]
[34m[1mwandb[0m: 	initialization: uniform
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 1.1176874201725062e-05
[34m[1mwandb[0m: 	l2_factor: 7.711984147565895e-05
[34m[1mwandb[0m: 	lr

Config del trial
{'GAE_lambda': 0.95, 'T': 512, 'activation': 'tanh', 'actor_lr': 0.0009946089387856578, 'adv_std': False, 'bn': True, 'clipping_epsilon': 0.2, 'critic_lr': 0.0013294521847346972, 'decay_method': 'exponential', 'dropout_prob': 0.3, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.021713228241210053, 'epochs': 10, 'exponential_factor': 0.930294471916696, 'gamma': 0.9, 'hidden_sizes': [150, 350, 350], 'initialization': 'uniform', 'input_size': 10, 'l1_factor': 1.1176874201725062e-05, 'l2_factor': 7.711984147565895e-05, 'lrelu': 0.1, 'minibatch_size': 32, 'momentum': 0.99, 'output_size': 6, 'target_kl': 0.03, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [150, 350, 350], 'output_size': 6, 'dropout_prob': 0.3, 'activation': 'tanh', 'lrelu': 0.1, 'bn': True, 'momentum': 0.99, 'initialization': 'uniform', 'GAE_lambda': 0.95, 'T': 512, 'actor_lr': 0.0009946089387856578

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▅▅▇██▆▇▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆
Duration/Mean_val_ep_duration,▁▅█▇▆▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅
Learning_rate/Actor,█▇▆▆▅▄▄▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,█▇▆▆▅▄▄▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,█▂▆▄▂▁▅▂▂▃▂▄▄▃▂▃▁▄▂▂▂▂▃▁▃▂▄▂▃▁▁▂▄▂▂▃▃▅▂▂
Loss/Critic_loss,█▃▆▃▂▂▄▂▂▂▁▁▂▁▂▂▁▂▂▁▁▁▁▂▁▁▂▁▁▁▂▁▂▁▁▁▂▃▁▁
Loss/Entropy_bonus,█▇▇▆▆▅▃▃▂▂▂▂▂▂▂▂▁▂▁▁▁▂▂▁▁▁▂▁▂▁▁▂▂▁▁▂▂▂▂▂
Loss/KL_divergence,█▆▅▂▆▆▅▆▁█▁▃▁▂▂▂▂▂▁▂▂▂▃▂▂▂▃▂▁▂▂▂▃▁▂▃▂▃▂▂
Loss/Policy_loss,█▂▆▄▂▁▅▂▂▃▂▄▄▃▂▃▁▄▂▂▂▂▃▁▃▂▄▂▃▁▁▂▄▂▂▃▃▅▂▂
Loss/Regularized_Actor_loss,█▂▆▄▂▁▅▂▂▃▂▄▄▃▂▃▁▄▂▂▂▂▃▁▃▂▄▂▃▁▁▂▄▂▂▃▃▅▂▂

0,1
Duration/Mean_train_ep_duration,56.25
Duration/Mean_val_ep_duration,56.7
Learning_rate/Actor,0.0
Learning_rate/Critic,0.0
Loss/Actor_loss,5.68506
Loss/Critic_loss,2.20964
Loss/Entropy_bonus,0.33642
Loss/KL_divergence,-0.01509
Loss/Policy_loss,5.69236
Loss/Regularized_Actor_loss,5.9912


[34m[1mwandb[0m: Agent Starting Run: y2vgdysr with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 256
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	actor_lr: 0.00956831874476812
[34m[1mwandb[0m: 	adv_std: True
[34m[1mwandb[0m: 	bn: False
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.0005531724485946037
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.1
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.034161780379417694
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.8653163840188317
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	hidden_sizes: [250, 150]
[34m[1mwandb[0m: 	initialization: normal
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 1.2828593297696874e-05
[34m[1mwandb[0m: 	l2_factor: 0.0005961001406206773
[34m[1mwandb[0m: 	lrelu: 0

Config del trial
{'GAE_lambda': 0.95, 'T': 256, 'activation': 'tanh', 'actor_lr': 0.00956831874476812, 'adv_std': True, 'bn': False, 'clipping_epsilon': 0.2, 'critic_lr': 0.0005531724485946037, 'decay_method': 'exponential', 'dropout_prob': 0.1, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.034161780379417694, 'epochs': 10, 'exponential_factor': 0.8653163840188317, 'gamma': 0.99, 'hidden_sizes': [250, 150], 'initialization': 'normal', 'input_size': 10, 'l1_factor': 1.2828593297696874e-05, 'l2_factor': 0.0005961001406206773, 'lrelu': 0.001, 'minibatch_size': 128, 'momentum': 0.9, 'output_size': 6, 'target_kl': 0.01, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [250, 150], 'output_size': 6, 'dropout_prob': 0.1, 'activation': 'tanh', 'lrelu': 0.001, 'bn': False, 'momentum': 0.9, 'initialization': 'normal', 'GAE_lambda': 0.95, 'T': 256, 'actor_lr': 0.00956831874476812, 'adv_std

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▁▁▃▂▃▃▃▂█▄▄█▅██▄▅▅▃▄▃▄▄▄▄▄▃▅▅▅▄▂▅▅▅▅▃▅
Duration/Mean_val_ep_duration,▁▁▂▂▂▂▃▄▆▅▅▆▇█▆▇▅▆▇▅▅▃▃▄▄▃▃▂▃▄▃▃▃▄▄▄▄▄▂▄
Learning_rate/Actor,█▇▆▆▅▄▄▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,█▇▆▆▅▄▄▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,█▄▁▅▃▃▄▆▇▇▆▄█▅▆▄▅▄▆▆▅▄▆▅▆▆▄▄▆▇▅▅▆▆▇▅▆▇▆▅
Loss/Critic_loss,▄█▃▂▂▂▂▂▂▂▂▂▂▁▁▁▂▁▂▂▂▂▂▂▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂
Loss/Entropy_bonus,█▇█▇▇▇▆▅▄▂▂▃▁▁▂▂▄▃▄▄▄▄▄▃▂▂▃▃▃▂▃▂▂▂▂▂▂▂▂▂
Loss/KL_divergence,▆▆▃▆▄▃▆▆█▅▄▅▇▃▁▃▂▃▄▂▆▂▅▅▅▄▃▆▅▅▄▅▅▄▅▄▅▄▄▄
Loss/Policy_loss,█▃▁▅▃▃▄▅▇▆▅▄▇▄▅▃▄▄▅▅▅▃▅▅▅▆▃▄▅▆▄▅▅▅▆▄▆▆▆▄
Loss/Regularized_Actor_loss,█▆▂▄▂▁▂▂▃▃▂▂▃▂▂▁▂▁▂▂▂▁▂▂▂▂▁▁▂▂▂▂▂▂▂▂▂▂▂▂

0,1
Duration/Mean_train_ep_duration,121.0
Duration/Mean_val_ep_duration,105.9
Learning_rate/Actor,1e-05
Learning_rate/Critic,0.0
Loss/Actor_loss,-0.06188
Loss/Critic_loss,13.08857
Loss/Entropy_bonus,1.12141
Loss/KL_divergence,-0.00319
Loss/Policy_loss,-0.02357
Loss/Regularized_Actor_loss,-0.03069


[34m[1mwandb[0m: Agent Starting Run: 3m894vuo with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 768
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	actor_lr: 0.007134139754695279
[34m[1mwandb[0m: 	adv_std: True
[34m[1mwandb[0m: 	bn: False
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.0007079128550560811
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.026892372310174346
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.8909132937934793
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	hidden_sizes: [250, 150, 250, 350]
[34m[1mwandb[0m: 	initialization: orthogonal
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 0.0003721921016927357
[34m[1mwandb[0m: 	l2_factor: 5.6259325338682934e-06
[34m[1mwandb[

Config del trial
{'GAE_lambda': 0.95, 'T': 768, 'activation': 'tanh', 'actor_lr': 0.007134139754695279, 'adv_std': True, 'bn': False, 'clipping_epsilon': 0.2, 'critic_lr': 0.0007079128550560811, 'decay_method': 'exponential', 'dropout_prob': 0, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.026892372310174346, 'epochs': 10, 'exponential_factor': 0.8909132937934793, 'gamma': 0.99, 'hidden_sizes': [250, 150, 250, 350], 'initialization': 'orthogonal', 'input_size': 10, 'l1_factor': 0.0003721921016927357, 'l2_factor': 5.6259325338682934e-06, 'lrelu': 0.001, 'minibatch_size': 128, 'momentum': 0.9, 'output_size': 6, 'target_kl': 0.01, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [250, 150, 250, 350], 'output_size': 6, 'dropout_prob': 0, 'activation': 'tanh', 'lrelu': 0.001, 'bn': False, 'momentum': 0.9, 'initialization': 'orthogonal', 'GAE_lambda': 0.95, 'T': 768, 'actor_lr': 0.00

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▂▄▅▃▄▃▄▆▅▄▇▅▃▅▄▅▅▆▇▄▅▄█▃▄▄▆▄▄▄▄▇▅▄▆▇▆▃▄
Duration/Mean_val_ep_duration,▁▅▂▃▇▄▅▅▇▆▅▄▅▄█▅▇▅▆▇▇▅▆▅▆█▆▅▆▇▅▇▆▇▅▅▆▇▆▆
Learning_rate/Actor,█▇▅▅▄▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,█▇▅▅▄▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,█▃▂▃▃▄▂▃▁▁▁▁▂▂▂▂▂▁▂▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂
Loss/Critic_loss,█▅▄▂▂▃▄▂▂▁▃▁▁▂▁▂▃▁▁▂▁▂▂▂▃▃▁▃▂▂▂▂▁▃▂▂▁▂▃▂
Loss/Entropy_bonus,█▆▇▅▃▄▅▃▂▃▂▃▅▄▃▃▂▁▂▄▂▃▄▅▃▃▃▃▃▃▃▃▄▂▃▃▃▃▃▃
Loss/KL_divergence,▆█▄▄▆▆▃▅▃▁▆▃▂▄▅▄▆▃▄▂▄▃▃▄▄▃▄▄▄▄▄▄▄▄▄▄▄▄▄▄
Loss/Policy_loss,█▃▂▃▃▄▂▂▁▁▁▁▂▂▂▂▂▁▂▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂
Loss/Regularized_Actor_loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
Duration/Mean_train_ep_duration,128.0
Duration/Mean_val_ep_duration,143.5
Learning_rate/Actor,0.0
Learning_rate/Critic,0.0
Loss/Actor_loss,-0.04046
Loss/Critic_loss,6.56448
Loss/Entropy_bonus,1.49913
Loss/KL_divergence,6e-05
Loss/Policy_loss,-0.00014
Loss/Regularized_Actor_loss,-0.01166


[34m[1mwandb[0m: Agent Starting Run: c4efcst2 with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 1024
[34m[1mwandb[0m: 	activation: lrelu
[34m[1mwandb[0m: 	actor_lr: 0.000257865439738344
[34m[1mwandb[0m: 	adv_std: True
[34m[1mwandb[0m: 	bn: False
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.0011216788406097128
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.3
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.002441431645634638
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.9329502280167148
[34m[1mwandb[0m: 	gamma: 0.95
[34m[1mwandb[0m: 	hidden_sizes: [250, 250, 150]
[34m[1mwandb[0m: 	initialization: uniform
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 0.00028722056835260015
[34m[1mwandb[0m: 	l2_factor: 2.859095592652245e-06
[34m[1mwandb[0m: 

Config del trial
{'GAE_lambda': 0.95, 'T': 1024, 'activation': 'lrelu', 'actor_lr': 0.000257865439738344, 'adv_std': True, 'bn': False, 'clipping_epsilon': 0.2, 'critic_lr': 0.0011216788406097128, 'decay_method': 'exponential', 'dropout_prob': 0.3, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.002441431645634638, 'epochs': 10, 'exponential_factor': 0.9329502280167148, 'gamma': 0.95, 'hidden_sizes': [250, 250, 150], 'initialization': 'uniform', 'input_size': 10, 'l1_factor': 0.00028722056835260015, 'l2_factor': 2.859095592652245e-06, 'lrelu': 0.01, 'minibatch_size': 128, 'momentum': 0.8, 'output_size': 6, 'target_kl': 0.02, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [250, 250, 150], 'output_size': 6, 'dropout_prob': 0.3, 'activation': 'lrelu', 'lrelu': 0.01, 'bn': False, 'momentum': 0.8, 'initialization': 'uniform', 'GAE_lambda': 0.95, 'T': 1024, 'actor_lr': 0.000257865439

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▂▁▄█▄▅▆▆▆▅█▅▅▅▄▅▅▆▆▆▆▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅
Duration/Mean_val_ep_duration,▁▁▂▇▆▄▆▅▆▅█▄▄▄▄▅▅▅▅▅▄▅▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄
Learning_rate/Actor,██▇▆▆▅▅▄▄▄▄▃▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,██▇▆▆▅▅▄▄▄▄▃▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,▆▅▄▃▄▆▄▃▄▃▃▅▆▃▄▃▂▄▂▄▅▁▅▂▂▂▃▂▂▅▂█▄▂▃▃▂▆▄▃
Loss/Critic_loss,█▅▃▃▂▃▃▂▃▂▃▃▂▂▂▂▁▁▁▁▁▃▃▂▁▂▂▂▂▂▂▂▁▂▂▂▂▂▂▂
Loss/Entropy_bonus,▇█████▇▇▇▆▇▆▆▆▅▆▅▄▄▃▂▂▂▁▁▁▂▁▂▁▁▁▁▁▁▁▁▁▂▁
Loss/KL_divergence,█▇▅▆▇▆▆▅▆▆▆▇▆▆▅▅▇▇█▆▄▁▄▂▂▃▂▃▃▃▂▃▄▂▂▃▂▃▃▃
Loss/Policy_loss,▆▆▄▄▄▆▄▃▅▃▃▅▆▃▄▃▂▄▂▄▅▁▅▂▂▂▃▂▂▅▂█▄▂▃▃▂▆▄▃
Loss/Regularized_Actor_loss,██▇▆▆▅▄▄▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
Duration/Mean_train_ep_duration,47.85
Duration/Mean_val_ep_duration,48.5
Learning_rate/Actor,0.0
Learning_rate/Critic,2e-05
Loss/Actor_loss,-0.00895
Loss/Critic_loss,6.23004
Loss/Entropy_bonus,0.29918
Loss/KL_divergence,-0.01732
Loss/Policy_loss,-0.00822
Loss/Regularized_Actor_loss,0.78533


[34m[1mwandb[0m: Agent Starting Run: 6v7fhosi with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 1024
[34m[1mwandb[0m: 	activation: lrelu
[34m[1mwandb[0m: 	actor_lr: 0.00015634875099178417
[34m[1mwandb[0m: 	adv_std: True
[34m[1mwandb[0m: 	bn: True
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.0011795897106670475
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.029973643646906637
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.998037235633636
[34m[1mwandb[0m: 	gamma: 0.9
[34m[1mwandb[0m: 	hidden_sizes: [250, 150, 350, 350]
[34m[1mwandb[0m: 	initialization: normal
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 0.0007642350770447983
[34m[1mwandb[0m: 	l2_factor: 1.849752049952352e-06
[34m[1mwandb[0m: 

Config del trial
{'GAE_lambda': 0.95, 'T': 1024, 'activation': 'lrelu', 'actor_lr': 0.00015634875099178417, 'adv_std': True, 'bn': True, 'clipping_epsilon': 0.2, 'critic_lr': 0.0011795897106670475, 'decay_method': 'exponential', 'dropout_prob': 0, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.029973643646906637, 'epochs': 10, 'exponential_factor': 0.998037235633636, 'gamma': 0.9, 'hidden_sizes': [250, 150, 350, 350], 'initialization': 'normal', 'input_size': 10, 'l1_factor': 0.0007642350770447983, 'l2_factor': 1.849752049952352e-06, 'lrelu': 0.01, 'minibatch_size': 64, 'momentum': 0.8, 'output_size': 6, 'target_kl': 0.01, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [250, 150, 350, 350], 'output_size': 6, 'dropout_prob': 0, 'activation': 'lrelu', 'lrelu': 0.01, 'bn': True, 'momentum': 0.8, 'initialization': 'normal', 'GAE_lambda': 0.95, 'T': 1024, 'actor_lr': 0.000156348750

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▁▂▃▃▅▆▄▃▅▇▄▄█▄▇▅▅▅▆▆▄▆▆▄▄▅█▆▆▅▇▅▄▅▅▇▆▅▄
Duration/Mean_val_ep_duration,▁▂▂▄▃▁▃▆▆▇▅▇█▇▇▇▆▄▇▆▄▅▅▅▆▃▅▅▅▅▆▄▅▆▆▆▇▇▅▇
Learning_rate/Actor,████▇▇▇▇▇▆▆▆▆▆▆▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▁▁▁
Learning_rate/Critic,████▇▇▇▇▇▆▆▆▆▆▆▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▁▁▁
Loss/Actor_loss,▁▃▃▄▁▃▂▃▆▄▄█▆▄▃▃▅▄▇▃▃▂▃█▂▃▂▆▂▆▂▅▃▃▂▂▂▂▂▃
Loss/Critic_loss,█▇▄▄▂▂▂▂▂▂▂▂▁▂▂▂▂▁▂▂▁▃▂▁▂▃▂▁▂▁▁▂▂▁▁▃▂▂▃▂
Loss/Entropy_bonus,█▇▆▅▅▆▅▅▄▅▄▃▃▂▃▂▂▂▁▂▂▁▄▃▃▄▂▄▄▄▄▄▄▃▃▄▅▄▃▂
Loss/KL_divergence,▆▇▅▇▅▅▅▃▇█▄▇▇▃▄▄▇▆▄▇▄▅▇▇▇▄▂▇▂▇▄█▇▄▃▄▃▁▁▁
Loss/Policy_loss,▁▃▃▄▁▃▂▃▆▄▄█▆▄▃▃▅▄▆▂▃▂▃█▂▃▂▆▂▆▂▅▃▃▂▁▂▂▂▃
Loss/Regularized_Actor_loss,█▇▅▄▃▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
Duration/Mean_train_ep_duration,59.76471
Duration/Mean_val_ep_duration,105.1
Learning_rate/Actor,0.00014
Learning_rate/Critic,0.00107
Loss/Actor_loss,-0.08265
Loss/Critic_loss,2.84667
Loss/Entropy_bonus,1.61494
Loss/KL_divergence,-0.0142
Loss/Policy_loss,-0.03424
Loss/Regularized_Actor_loss,0.29624


[34m[1mwandb[0m: Agent Starting Run: fec6gnix with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 256
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	actor_lr: 0.007777993899669044
[34m[1mwandb[0m: 	adv_std: False
[34m[1mwandb[0m: 	bn: True
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.00017829699216775452
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.1
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.013988089568541102
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.9945009684366972
[34m[1mwandb[0m: 	gamma: 0.95
[34m[1mwandb[0m: 	hidden_sizes: [150, 250, 150, 150]
[34m[1mwandb[0m: 	initialization: uniform
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 9.659596095862054e-05
[34m[1mwandb[0m: 	l2_factor: 6.486958398522311e-05
[34m[1mwandb[0

Config del trial
{'GAE_lambda': 0.95, 'T': 256, 'activation': 'tanh', 'actor_lr': 0.007777993899669044, 'adv_std': False, 'bn': True, 'clipping_epsilon': 0.2, 'critic_lr': 0.00017829699216775452, 'decay_method': 'exponential', 'dropout_prob': 0.1, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.013988089568541102, 'epochs': 10, 'exponential_factor': 0.9945009684366972, 'gamma': 0.95, 'hidden_sizes': [150, 250, 150, 150], 'initialization': 'uniform', 'input_size': 10, 'l1_factor': 9.659596095862054e-05, 'l2_factor': 6.486958398522311e-05, 'lrelu': 0.1, 'minibatch_size': 32, 'momentum': 0.99, 'output_size': 6, 'target_kl': 0.01, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [150, 250, 150, 150], 'output_size': 6, 'dropout_prob': 0.1, 'activation': 'tanh', 'lrelu': 0.1, 'bn': True, 'momentum': 0.99, 'initialization': 'uniform', 'GAE_lambda': 0.95, 'T': 256, 'actor_lr': 0.00777799

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▇█▇▅▆█▆▆▆▆▁▁▁▁▂▂▂▃▃▃▄▄▄▄▄▄▄▃▃▃▂▁▁▁▂▁▂
Duration/Mean_val_ep_duration,▄▅▅▆▅▅▄▆▅█▅▅▅▅▅▅▁▁▁▂▂▂▃▃▃▃▃▃▃▃▃▃▃▁▁▁▁▂▁▂
Learning_rate/Actor,███▇▇▇▇▇▇▆▆▆▆▆▅▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▂▁▁▁
Learning_rate/Critic,███▇▇▇▇▇▇▆▆▆▆▆▅▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▂▁▁▁
Loss/Actor_loss,▂▂▁▁▁▁▁▁▁▁▂▁▁▁▁▁▆▆▅▆▃▂▂▂▂▂▂▁▂▁▁▂▂▂▃█▆▆▃▄
Loss/Critic_loss,▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▇▆▅█▃▁▂▁▂▁▁▁▁▁▁▂▁▁▂███▂▃
Loss/Entropy_bonus,█▇▅▅▃▃▃▂▂▂▂▁▂▂▂▂▁▁▁▂▁▁▂▂▁▁▁▁▁▁▁▂▁▁▂▂▁▁▂▁
Loss/KL_divergence,▅▆▅▅▇▇▆▅▇▅▆▆▆▅▄▆▄▆▅▅▄▆▅▆█▅▆▆▆▆█▄▇▆▁▇▄▄▃▅
Loss/Policy_loss,▂▂▁▁▁▁▁▁▁▁▂▁▁▁▁▁▆▆▅▆▃▂▂▂▂▂▂▁▂▁▁▂▂▂▃█▆▆▃▄
Loss/Regularized_Actor_loss,▂▂▁▁▁▁▁▁▁▁▂▁▁▁▁▁▆▆▅▆▃▂▂▂▂▂▂▁▂▁▁▂▂▂▃█▆▆▃▄

0,1
Duration/Mean_train_ep_duration,25.33333
Duration/Mean_val_ep_duration,26.0
Learning_rate/Actor,0.0061
Learning_rate/Critic,0.00014
Loss/Actor_loss,40.51084
Loss/Critic_loss,28.68944
Loss/Entropy_bonus,0.00547
Loss/KL_divergence,0.01057
Loss/Policy_loss,40.51092
Loss/Regularized_Actor_loss,42.93804


[34m[1mwandb[0m: Agent Starting Run: mfc25q2f with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 768
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	actor_lr: 0.0029187950737004317
[34m[1mwandb[0m: 	adv_std: True
[34m[1mwandb[0m: 	bn: False
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.00031332809876424383
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.2
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.026731588806783772
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.9584592583804542
[34m[1mwandb[0m: 	gamma: 0.95
[34m[1mwandb[0m: 	hidden_sizes: [150, 350, 250]
[34m[1mwandb[0m: 	initialization: normal
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 5.54582712004736e-06
[34m[1mwandb[0m: 	l2_factor: 1.0015565442953912e-05
[34m[1mwandb[0m: 	l

Config del trial
{'GAE_lambda': 0.95, 'T': 768, 'activation': 'tanh', 'actor_lr': 0.0029187950737004317, 'adv_std': True, 'bn': False, 'clipping_epsilon': 0.2, 'critic_lr': 0.00031332809876424383, 'decay_method': 'exponential', 'dropout_prob': 0.2, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.026731588806783772, 'epochs': 10, 'exponential_factor': 0.9584592583804542, 'gamma': 0.95, 'hidden_sizes': [150, 350, 250], 'initialization': 'normal', 'input_size': 10, 'l1_factor': 5.54582712004736e-06, 'l2_factor': 1.0015565442953912e-05, 'lrelu': 0.01, 'minibatch_size': 256, 'momentum': 0.9, 'output_size': 6, 'target_kl': 0.02, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [150, 350, 250], 'output_size': 6, 'dropout_prob': 0.2, 'activation': 'tanh', 'lrelu': 0.01, 'bn': False, 'momentum': 0.9, 'initialization': 'normal', 'GAE_lambda': 0.95, 'T': 768, 'actor_lr': 0.00291879507370043

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▂▂▂▂▂▂▂▂▂▃▃▄▄▂▂▃▂▃█▄▄▅▄▄▃▄▃▇▅▄▇▃██▃██▅▅
Duration/Mean_val_ep_duration,▁▂▃▂▂▂▂▂▃▃▃▄▅▄▃▄▆▆▆▆▇▄▆▇█▇▆█▇▇▆▇▇█▇█▆▆▇▅
Learning_rate/Actor,█▇▆▅▄▄▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,█▇▆▅▄▄▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,▁▄▃▆▆▃▅▅▅▅▅▆▅▆▆▆▇▇▇▅▆▆▆▆▇▆▇▇▇▇▇▇▆▇▇▇▇█▆█
Loss/Critic_loss,█▅▄▂▂▂▂▂▃▁▁▂▃▁▅▃▁▂▂▁▃▂▃▂▄▂▃▂▂▄▃▃▁▁▂▂▂▃▃▁
Loss/Entropy_bonus,█▇▅▄▁▂▂▂▂▂▂▂▃▂▂▂▂▂▂▂▂▁▁▁▂▂▁▁▂▂▁▁▁▂▂▁▁▁▂▂
Loss/KL_divergence,▅▅▆█▆▁▄▄▄▄▃▄▄▃▃▃▂▃▄▄▃▄▅▃▃▃▃▃▄▃▄▄▃▃▃▄▅▃▃▄
Loss/Policy_loss,▁▅▂▆▅▁▄▄▄▄▄▅▄▅▅▆▆▇▇▄▆▆▅▆▆▅▆▆▆▇▆▇▆▇▇▆▆█▆█
Loss/Regularized_Actor_loss,▁▄▂▇▆▃▅▅▅▅▅▅▄▆▆▆▆▇▆▄▆▆▆▆▆▅▆▆▆▇▆▇▆▇▇▆▆█▆█

0,1
Duration/Mean_train_ep_duration,240.5
Duration/Mean_val_ep_duration,191.10001
Learning_rate/Actor,0.0
Learning_rate/Critic,0.0
Loss/Actor_loss,0.00794
Loss/Critic_loss,3.49813
Loss/Entropy_bonus,0.48926
Loss/KL_divergence,-0.00122
Loss/Policy_loss,0.02102
Loss/Regularized_Actor_loss,0.04165


[34m[1mwandb[0m: Agent Starting Run: jn6jm2zv with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 256
[34m[1mwandb[0m: 	activation: lrelu
[34m[1mwandb[0m: 	actor_lr: 0.0013464344254923513
[34m[1mwandb[0m: 	adv_std: False
[34m[1mwandb[0m: 	bn: False
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.0007630369473629689
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.3
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.033396804024958156
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.8892942856033342
[34m[1mwandb[0m: 	gamma: 0.9
[34m[1mwandb[0m: 	hidden_sizes: [350, 350, 250, 250]
[34m[1mwandb[0m: 	initialization: normal
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 3.171552141796959e-05
[34m[1mwandb[0m: 	l2_factor: 0.0005857793816828786
[34m[1mwandb[0

Config del trial
{'GAE_lambda': 0.95, 'T': 256, 'activation': 'lrelu', 'actor_lr': 0.0013464344254923513, 'adv_std': False, 'bn': False, 'clipping_epsilon': 0.2, 'critic_lr': 0.0007630369473629689, 'decay_method': 'exponential', 'dropout_prob': 0.3, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.033396804024958156, 'epochs': 10, 'exponential_factor': 0.8892942856033342, 'gamma': 0.9, 'hidden_sizes': [350, 350, 250, 250], 'initialization': 'normal', 'input_size': 10, 'l1_factor': 3.171552141796959e-05, 'l2_factor': 0.0005857793816828786, 'lrelu': 0.001, 'minibatch_size': 256, 'momentum': 0.8, 'output_size': 6, 'target_kl': 0.03, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [350, 350, 250, 250], 'output_size': 6, 'dropout_prob': 0.3, 'activation': 'lrelu', 'lrelu': 0.001, 'bn': False, 'momentum': 0.8, 'initialization': 'normal', 'GAE_lambda': 0.95, 'T': 256, 'actor_lr': 0.0013

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▂▃▃▂▂▅▇▅▃▅▃▃▃▃▄▅▅▇▇▇▆▇▇▆▅▆▇▇▆▆▇██▆█▆▆█▇
Duration/Mean_val_ep_duration,▂▅▁▁▁▂▇▇▃▃▃▂▃▃▃▄▅█▇▇▇█▇▆█▆▆▆▇▇█▇▇▆▇▆▇▇▆█
Learning_rate/Actor,█▇▆▅▄▄▃▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,█▇▆▅▄▄▃▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,▆█▆▇▇▇▄▂▃▅▃▆▆▆▆▄▃▃▂▂▂▃▂▂▂▂▂▂▂▃▂▂▁▁▂▂▂▂▂▂
Loss/Critic_loss,▆█▄▅▄▂▂▁▂▄▃▄▃▃▂▃▂▂▂▂▂▂▁▁▂▂▂▁▁▂▁▁▁▁▁▁▂▁▂▁
Loss/Entropy_bonus,█▄▇▃▁▂▄▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅
Loss/KL_divergence,█▅▄▄▄▁▇▄▄▄▃▅▄▄▄▄▃▄▄▃▄▄▄▄▃▅▃▄▄▄▄▄▄▄▄▅▄▃▄▃
Loss/Policy_loss,▆█▆▇▇▇▄▂▃▅▃▆▆▆▆▄▃▃▂▂▂▃▂▂▂▂▂▂▂▃▂▂▁▁▂▂▂▂▂▂
Loss/Regularized_Actor_loss,▆█▆▇▇▇▄▂▃▅▃▆▆▆▆▄▃▃▂▂▂▃▂▂▂▂▂▂▂▃▂▂▁▁▂▂▂▂▂▂

0,1
Duration/Mean_train_ep_duration,47.0
Duration/Mean_val_ep_duration,50.1
Learning_rate/Actor,0.0
Learning_rate/Critic,0.0
Loss/Actor_loss,10.98855
Loss/Critic_loss,13.63362
Loss/Entropy_bonus,0.48354
Loss/KL_divergence,0.03609
Loss/Policy_loss,11.00469
Loss/Regularized_Actor_loss,12.55517


[34m[1mwandb[0m: Agent Starting Run: sd1pl88m with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 1024
[34m[1mwandb[0m: 	activation: lrelu
[34m[1mwandb[0m: 	actor_lr: 0.006654488259229976
[34m[1mwandb[0m: 	adv_std: False
[34m[1mwandb[0m: 	bn: False
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.0015656912336263088
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.1
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.04298588313278487
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.9497457958975826
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	hidden_sizes: [350, 150, 150, 150]
[34m[1mwandb[0m: 	initialization: normal
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 0.0008160435316339294
[34m[1mwandb[0m: 	l2_factor: 1.9065888959087276e-06
[34m[1mwandb[

Config del trial
{'GAE_lambda': 0.95, 'T': 1024, 'activation': 'lrelu', 'actor_lr': 0.006654488259229976, 'adv_std': False, 'bn': False, 'clipping_epsilon': 0.2, 'critic_lr': 0.0015656912336263088, 'decay_method': 'exponential', 'dropout_prob': 0.1, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.04298588313278487, 'epochs': 10, 'exponential_factor': 0.9497457958975826, 'gamma': 0.99, 'hidden_sizes': [350, 150, 150, 150], 'initialization': 'normal', 'input_size': 10, 'l1_factor': 0.0008160435316339294, 'l2_factor': 1.9065888959087276e-06, 'lrelu': 0.01, 'minibatch_size': 128, 'momentum': 0.8, 'output_size': 6, 'target_kl': 0.03, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [350, 150, 150, 150], 'output_size': 6, 'dropout_prob': 0.1, 'activation': 'lrelu', 'lrelu': 0.01, 'bn': False, 'momentum': 0.8, 'initialization': 'normal', 'GAE_lambda': 0.95, 'T': 1024, 'actor_lr': 0.0066

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▁▁▁▁▁▂▂▂▂▃▃▂▂▃▃▃▃▃▃▃▃▃▃▄█▄▅███▆▇█▇▄▅▇▇▆
Duration/Mean_val_ep_duration,▁▁▁▁▁▁▂▂▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▄▄▆▆██▆█▇▇▆▆▆▆▅▆▆
Learning_rate/Actor,█▇▆▅▄▄▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,█▇▆▅▄▄▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,████▆▃▄▄▃▃▂▂▃▃▂▃▂▂▂▂▂▂▂▂▂▁▂▁▁▁▁▁▁▁▁▁▂▁▁▁
Loss/Critic_loss,▃▂▁▁▆█▄▃▃▄▁▁▂▂▁▁▂▃▂▁▂▃▂▃▃▃▂▃▂▁▂▂▃▂▂▂▅▁▂▂
Loss/Entropy_bonus,▁▁▁▁███▇▇▇▆▆▅▅▅▅▅▅▅▅▅▅▅▅▅▄▄▄▃▃▄▃▃▃▃▃▃▃▃▃
Loss/KL_divergence,▇▇▇▇▁▇▆▆▇▇▇█▆▇▇▇▇▇▆▇▇▇▇▇█▇▇▇▇▇▇▇▆▆▇▆▆▇▇▇
Loss/Policy_loss,████▆▃▄▄▃▃▂▂▃▃▂▃▂▂▂▂▂▂▃▂▂▁▂▁▁▁▁▁▁▁▁▁▂▁▁▁
Loss/Regularized_Actor_loss,████▆▃▄▄▃▃▂▂▃▃▂▃▂▂▂▂▂▂▂▂▂▁▂▁▁▁▁▁▁▁▁▁▂▁▁▁

0,1
Duration/Mean_train_ep_duration,141.0
Duration/Mean_val_ep_duration,255.2
Learning_rate/Actor,0.0
Learning_rate/Critic,0.0
Loss/Actor_loss,-18.8177
Loss/Critic_loss,7.25764
Loss/Entropy_bonus,0.53707
Loss/KL_divergence,-0.00504
Loss/Policy_loss,-18.79461
Loss/Regularized_Actor_loss,-18.23936


[34m[1mwandb[0m: Agent Starting Run: z550fq3j with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 512
[34m[1mwandb[0m: 	activation: lrelu
[34m[1mwandb[0m: 	actor_lr: 0.009030073718482556
[34m[1mwandb[0m: 	adv_std: True
[34m[1mwandb[0m: 	bn: True
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.0005080864438277403
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.2
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.036336246949039815
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.956328476161138
[34m[1mwandb[0m: 	gamma: 0.95
[34m[1mwandb[0m: 	hidden_sizes: [350, 150, 250]
[34m[1mwandb[0m: 	initialization: normal
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 0.0001086571790345529
[34m[1mwandb[0m: 	l2_factor: 7.981505172140598e-06
[34m[1mwandb[0m: 	lrel

Config del trial
{'GAE_lambda': 0.95, 'T': 512, 'activation': 'lrelu', 'actor_lr': 0.009030073718482556, 'adv_std': True, 'bn': True, 'clipping_epsilon': 0.2, 'critic_lr': 0.0005080864438277403, 'decay_method': 'exponential', 'dropout_prob': 0.2, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.036336246949039815, 'epochs': 10, 'exponential_factor': 0.956328476161138, 'gamma': 0.95, 'hidden_sizes': [350, 150, 250], 'initialization': 'normal', 'input_size': 10, 'l1_factor': 0.0001086571790345529, 'l2_factor': 7.981505172140598e-06, 'lrelu': 0.001, 'minibatch_size': 64, 'momentum': 0.95, 'output_size': 6, 'target_kl': 0.03, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [350, 150, 250], 'output_size': 6, 'dropout_prob': 0.2, 'activation': 'lrelu', 'lrelu': 0.001, 'bn': True, 'momentum': 0.95, 'initialization': 'normal', 'GAE_lambda': 0.95, 'T': 512, 'actor_lr': 0.00903007371848255

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▂▁▃▅▃▄▄▄▅▄▄▄▅▄▅▇▆▆▇█▇▅█▇▅▇▆▆▇▇▇▇▇█▇▆▆▇▇█
Duration/Mean_val_ep_duration,▂▁▄▄▅▄▄▄▄▅▄▅▅▆▆█▆▇██▇█▆▇█▆██▇████▇█████▆
Learning_rate/Actor,█▇▅▅▄▃▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,█▇▅▅▄▃▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,▃▆▂▂▃▅▁▄▄▅▃▂▃▄▄▄▃▆▆▆▅▄▄▅▅▅▇▄▆▇▅█▅▆▆▅▇▄▅▅
Loss/Critic_loss,▃█▃▁▂▁▁▁▁▁▁▁▁▁▂▂▂▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Entropy_bonus,▅▅▅██▆▆▄▆▅▆▆▅▅▃▂▄▄▄▄▃▃▃▃▂▂▃▁▂▃▃▄▂▃▃▃▄▄▂▃
Loss/KL_divergence,▁█▄▃▅▆▅▄▄▄▃▃▄▄▅▃▄▅▇▄▆▇▃▅▄▇▂▂▂▄▃▃▄▆▅▄▇▄▄▅
Loss/Policy_loss,▃▇▁▂▃▅▁▄▄▅▃▂▂▄▄▄▃▆▆▆▄▃▄▅▄▅▇▄▅▇▅█▅▆▆▅▇▄▄▅
Loss/Regularized_Actor_loss,█▇▂▂▂▃▁▃▂▃▂▁▂▂▂▂▂▃▃▃▂▂▂▃▃▃▃▂▃▄▂▄▃▃▃▃▄▂▂▃

0,1
Duration/Mean_train_ep_duration,137.5
Duration/Mean_val_ep_duration,93.9
Learning_rate/Actor,0.0
Learning_rate/Critic,0.0
Loss/Actor_loss,-0.04522
Loss/Critic_loss,3.74128
Loss/Entropy_bonus,1.17656
Loss/KL_divergence,0.00987
Loss/Policy_loss,-0.00247
Loss/Regularized_Actor_loss,-0.01559


[34m[1mwandb[0m: Agent Starting Run: 4hyl7x4n with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 256
[34m[1mwandb[0m: 	activation: lrelu
[34m[1mwandb[0m: 	actor_lr: 0.0016471526238193343
[34m[1mwandb[0m: 	adv_std: False
[34m[1mwandb[0m: 	bn: False
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.0004726406677126958
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.1
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.028822420523710323
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.9460267963755316
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	hidden_sizes: [350, 150, 150, 150]
[34m[1mwandb[0m: 	initialization: orthogonal
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 0.00014563171845030388
[34m[1mwandb[0m: 	l2_factor: 2.0378492374470233e-06
[34m[1mw

Config del trial
{'GAE_lambda': 0.95, 'T': 256, 'activation': 'lrelu', 'actor_lr': 0.0016471526238193343, 'adv_std': False, 'bn': False, 'clipping_epsilon': 0.2, 'critic_lr': 0.0004726406677126958, 'decay_method': 'exponential', 'dropout_prob': 0.1, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.028822420523710323, 'epochs': 10, 'exponential_factor': 0.9460267963755316, 'gamma': 0.99, 'hidden_sizes': [350, 150, 150, 150], 'initialization': 'orthogonal', 'input_size': 10, 'l1_factor': 0.00014563171845030388, 'l2_factor': 2.0378492374470233e-06, 'lrelu': 0.001, 'minibatch_size': 64, 'momentum': 0.8, 'output_size': 6, 'target_kl': 0.01, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [350, 150, 150, 150], 'output_size': 6, 'dropout_prob': 0.1, 'activation': 'lrelu', 'lrelu': 0.001, 'bn': False, 'momentum': 0.8, 'initialization': 'orthogonal', 'GAE_lambda': 0.95, 'T': 256, 'actor_l

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▃▃█▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃
Duration/Mean_val_ep_duration,▂▂█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Actor,██▇▇▆▆▆▅▅▅▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁
Learning_rate/Critic,██▇▇▆▆▆▅▅▅▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁
Loss/Actor_loss,▄▇▇▁█▆█▇▇▇▇▇▇▇▇▇▇█▇▇▇▇▆▇▇▇▇▇▇▇▇▇▇
Loss/Critic_loss,█▃▃▂▂▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Entropy_bonus,█▆▄▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/KL_divergence,█▅▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Policy_loss,▄▇▇▁█▆█▇▇▇▇▇▇▇▇▇▇█▇▇▇▇▆▇▇▇▇▇▇▇▇▇▇
Loss/Regularized_Actor_loss,▄▇▇▁█▆█▇▇▇▇▇▇▇▇▇▇█▇▇▇▇▆▇▇▇▇▇▇▇▇▇▇

0,1
Duration/Mean_train_ep_duration,18.0
Duration/Mean_val_ep_duration,18.0
Learning_rate/Actor,0.00028
Learning_rate/Critic,8e-05
Loss/Actor_loss,70.88287
Loss/Critic_loss,2.49327
Loss/Entropy_bonus,0.0
Loss/KL_divergence,0.0
Loss/Policy_loss,70.88287
Loss/Regularized_Actor_loss,71.57142


[34m[1mwandb[0m: Agent Starting Run: tgo2grkx with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 1024
[34m[1mwandb[0m: 	activation: lrelu
[34m[1mwandb[0m: 	actor_lr: 0.0012148203300319407
[34m[1mwandb[0m: 	adv_std: False
[34m[1mwandb[0m: 	bn: False
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.007967818922842752
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.03417704927241633
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.9484429329144144
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	hidden_sizes: [250, 350, 350]
[34m[1mwandb[0m: 	initialization: orthogonal
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 3.66160526562478e-05
[34m[1mwandb[0m: 	l2_factor: 5.444665603148154e-06
[34m[1mwandb[0m: 	

Config del trial
{'GAE_lambda': 0.95, 'T': 1024, 'activation': 'lrelu', 'actor_lr': 0.0012148203300319407, 'adv_std': False, 'bn': False, 'clipping_epsilon': 0.2, 'critic_lr': 0.007967818922842752, 'decay_method': 'exponential', 'dropout_prob': 0, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.03417704927241633, 'epochs': 10, 'exponential_factor': 0.9484429329144144, 'gamma': 0.99, 'hidden_sizes': [250, 350, 350], 'initialization': 'orthogonal', 'input_size': 10, 'l1_factor': 3.66160526562478e-05, 'l2_factor': 5.444665603148154e-06, 'lrelu': 0.1, 'minibatch_size': 32, 'momentum': 0.99, 'output_size': 6, 'target_kl': 0.02, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [250, 350, 350], 'output_size': 6, 'dropout_prob': 0, 'activation': 'lrelu', 'lrelu': 0.1, 'bn': False, 'momentum': 0.99, 'initialization': 'orthogonal', 'GAE_lambda': 0.95, 'T': 1024, 'actor_lr': 0.0012148203300

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▁▁▁▁▁▁▁▁▂▂▂▃▄▃▅▄▄▃▄▄▄▄▄▄▄▄▅▆▆▆▆▄█▄▅▄▆▅▇
Duration/Mean_val_ep_duration,▁▁▁▁▁▁▁▁▁▂▂▂▄▄▄▄▄▄▄▄▄▄▄▄▅▅▆▆▅█▆▆▆█▆▆▆▅▇▇
Learning_rate/Actor,█▇▆▄▄▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,█▇▆▄▄▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,████▆▆▆▆▆▅▅▅▄▄▃▃▃▃▃▃▃▃▄▄▂▃▂▂▂▁▂▂▂▁▂▂▂▁▂▂
Loss/Critic_loss,▄▁▂▃▁▁▁▁▁▄▂▄▄▂▂▂▂▂▂▃▂▄▅▃▆▃▄▅▃█▆▆▅▅▅▄▂█▄▄
Loss/Entropy_bonus,▁▁▁▁▁▁▁▁▁████▇▆▇▆▆▆▆▆▅▅▅▅▅▄▄▄▄▅▃▄▄▄▃▄▄▄▄
Loss/KL_divergence,▄▄▄▄▄▄▄▄▄▄▁▄▃▆▄▅▇▄▃█▄▃▂▃▃▄▅▄▅▄▄▄▄▄▄▄▄▄▄▄
Loss/Policy_loss,████▆▆▆▆▆▅▅▅▄▄▃▃▃▃▃▃▃▃▄▄▂▃▂▂▂▁▂▂▂▁▂▂▂▁▂▂
Loss/Regularized_Actor_loss,████▆▆▆▆▆▅▅▅▄▄▃▃▃▃▃▃▃▃▄▄▂▃▂▂▂▁▂▂▂▁▂▂▂▁▂▂

0,1
Duration/Mean_train_ep_duration,289.0
Duration/Mean_val_ep_duration,244.89999
Learning_rate/Actor,0.0
Learning_rate/Critic,0.0
Loss/Actor_loss,-23.21521
Loss/Critic_loss,5.81314
Loss/Entropy_bonus,0.7188
Loss/KL_divergence,-0.00013
Loss/Policy_loss,-23.19064
Loss/Regularized_Actor_loss,-23.15919


[34m[1mwandb[0m: Agent Starting Run: z4dlj2pl with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 1024
[34m[1mwandb[0m: 	activation: lrelu
[34m[1mwandb[0m: 	actor_lr: 0.002613511253353624
[34m[1mwandb[0m: 	adv_std: True
[34m[1mwandb[0m: 	bn: True
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.0014613079023404385
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.024549175124887367
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.9100960325350222
[34m[1mwandb[0m: 	gamma: 0.9
[34m[1mwandb[0m: 	hidden_sizes: [250, 250, 350, 250]
[34m[1mwandb[0m: 	initialization: normal
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 8.809835747837358e-06
[34m[1mwandb[0m: 	l2_factor: 2.2090590696316883e-05
[34m[1mwandb[0m: 

Config del trial
{'GAE_lambda': 0.95, 'T': 1024, 'activation': 'lrelu', 'actor_lr': 0.002613511253353624, 'adv_std': True, 'bn': True, 'clipping_epsilon': 0.2, 'critic_lr': 0.0014613079023404385, 'decay_method': 'exponential', 'dropout_prob': 0, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.024549175124887367, 'epochs': 10, 'exponential_factor': 0.9100960325350222, 'gamma': 0.9, 'hidden_sizes': [250, 250, 350, 250], 'initialization': 'normal', 'input_size': 10, 'l1_factor': 8.809835747837358e-06, 'l2_factor': 2.2090590696316883e-05, 'lrelu': 0.1, 'minibatch_size': 64, 'momentum': 0.95, 'output_size': 6, 'target_kl': 0.01, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [250, 250, 350, 250], 'output_size': 6, 'dropout_prob': 0, 'activation': 'lrelu', 'lrelu': 0.1, 'bn': True, 'momentum': 0.95, 'initialization': 'normal', 'GAE_lambda': 0.95, 'T': 1024, 'actor_lr': 0.002613511253

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▄█▆▅▆▆▆▆▇▆█▆▆▆▇▇▇▅▇▆▆█▆▇█▇▇▆▆▆▇▆▆▇▆▆▅▆▅
Duration/Mean_val_ep_duration,▁▂▃▃▄▆▆▅▇██▇▆▇▅▅▆▅▄▄▇█▇▆▆▇▇▅▇▆▇▆▆▆▅▆▅▅▅▅
Learning_rate/Actor,█▇▆▅▄▃▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,█▇▆▅▄▃▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,▃▃▁▁▁▆█▆▃▃▅▃▃▁▄▄▃▄▄▄▃▃▃▂▃▄▃▃▃▄▄▅▄▄▄▃▅▄▇▃
Loss/Critic_loss,▅█▄▂▂▂▂▂▁▁▂▁▂▁▂▁▁▁▂▁▁▂▁▁▁▁▁▂▂▁▁▁▁▁▂▁▂▂▁▂
Loss/Entropy_bonus,█▆▅▅▃▃▄▃▃▃▃▃▂▃▂▃▂▂▂▂▂▁▂▂▂▁▁▂▂▂▁▁▂▂▂▁▂▂▁▂
Loss/KL_divergence,▆▆▁▃█▇▆▆▇▆▆▅▁▃▅▆▆▆▅▅▅▁▄█▅▃▃▆▆▆▃▇▂▄▄▆▅▅▂▂
Loss/Policy_loss,▄▄▂▁▁▆█▆▃▃▅▂▂▁▄▃▃▃▃▄▂▂▂▂▃▃▃▃▂▄▄▄▄▄▄▃▄▄▆▃
Loss/Regularized_Actor_loss,██▃▂▂▆█▅▃▃▅▂▂▁▃▃▂▃▃▃▂▂▂▂▂▃▃▂▂▃▃▄▃▃▃▃▄▃▅▂

0,1
Duration/Mean_train_ep_duration,66.61539
Duration/Mean_val_ep_duration,73.5
Learning_rate/Actor,0.0
Learning_rate/Critic,0.0
Loss/Actor_loss,-0.044
Loss/Critic_loss,2.84472
Loss/Entropy_bonus,1.03125
Loss/KL_divergence,-0.00903
Loss/Policy_loss,-0.01868
Loss/Regularized_Actor_loss,0.01385


[34m[1mwandb[0m: Agent Starting Run: 0bdereef with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 256
[34m[1mwandb[0m: 	activation: lrelu
[34m[1mwandb[0m: 	actor_lr: 0.0005065810029301698
[34m[1mwandb[0m: 	adv_std: True
[34m[1mwandb[0m: 	bn: False
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.00039428783418726034
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.1
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.019136652397261124
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.8705528039868401
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	hidden_sizes: [350, 250, 250, 250]
[34m[1mwandb[0m: 	initialization: uniform
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 4.634103828323751e-06
[34m[1mwandb[0m: 	l2_factor: 7.16683875319009e-05
[34m[1mwandb[

Config del trial
{'GAE_lambda': 0.95, 'T': 256, 'activation': 'lrelu', 'actor_lr': 0.0005065810029301698, 'adv_std': True, 'bn': False, 'clipping_epsilon': 0.2, 'critic_lr': 0.00039428783418726034, 'decay_method': 'exponential', 'dropout_prob': 0.1, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.019136652397261124, 'epochs': 10, 'exponential_factor': 0.8705528039868401, 'gamma': 0.99, 'hidden_sizes': [350, 250, 250, 250], 'initialization': 'uniform', 'input_size': 10, 'l1_factor': 4.634103828323751e-06, 'l2_factor': 7.16683875319009e-05, 'lrelu': 0.01, 'minibatch_size': 32, 'momentum': 0.99, 'output_size': 6, 'target_kl': 0.02, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [350, 250, 250, 250], 'output_size': 6, 'dropout_prob': 0.1, 'activation': 'lrelu', 'lrelu': 0.01, 'bn': False, 'momentum': 0.99, 'initialization': 'uniform', 'GAE_lambda': 0.95, 'T': 256, 'actor_lr': 0.000

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▂▂▂▂▂▃▆▅▆▅▆▆▇█▇▇████▇▇▇▆▆▇▆▇▇▆▇▅▇▅▅█▇▇▇
Duration/Mean_val_ep_duration,▁▂▁▃▅▅▆▆▇█▇▇▇▇▇█▇█▇▇▇▇▇▇█▇▇█▇▇▇▇▇▇▇▇▇█▇▇
Learning_rate/Actor,█▆▅▃▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,█▆▅▃▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,█▃▁▁▅▅▂▃▄▂▂▄▄▃▃▂▃▆▅▅▄▃▄▄▇▃▆▅▃▄▃▄▂█▅▃▃▄▄▄
Loss/Critic_loss,▇█▂▂▃▂▂▅▁▂▂▁▂▁▂▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▄▁▁▂▁▁▁▁▂▃
Loss/Entropy_bonus,█▇▁▅▅▅▄▃▅▄▅▆▅▄▅▅▇▄▄▆▆▄▅▄▇▅▄▄▄▄▅▆▄▅▄▄▆▆▆▅
Loss/KL_divergence,██▂▁▅▆▅▅▅▅▅▅▅▆▅▄▄▆▅▅▅▃▄▅▅▃▆▅▄▅▅▃▃▆▅▃▄▅▄▅
Loss/Policy_loss,█▃▁▂▅▅▂▃▄▃▂▄▄▃▃▂▃▆▅▆▄▃▄▄▇▃▅▅▃▄▃▄▂█▅▃▃▄▄▄
Loss/Regularized_Actor_loss,█▃▁▁▄▄▁▃▃▂▂▄▃▂▂▁▂▅▄▅▃▂▃▄▆▂▅▄▂▃▃▄▁▇▅▃▂▃▃▃

0,1
Duration/Mean_train_ep_duration,168.0
Duration/Mean_val_ep_duration,170.2
Learning_rate/Actor,0.0
Learning_rate/Critic,0.0
Loss/Actor_loss,0.03592
Loss/Critic_loss,12.17777
Loss/Entropy_bonus,0.61377
Loss/KL_divergence,0.03254
Loss/Policy_loss,0.04767
Loss/Regularized_Actor_loss,0.18601


[34m[1mwandb[0m: Agent Starting Run: ojzffjbi with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 512
[34m[1mwandb[0m: 	activation: lrelu
[34m[1mwandb[0m: 	actor_lr: 0.0054534714479357324
[34m[1mwandb[0m: 	adv_std: True
[34m[1mwandb[0m: 	bn: False
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.0002850386939791313
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.1
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.0297418762920168
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.9723855316915349
[34m[1mwandb[0m: 	gamma: 0.95
[34m[1mwandb[0m: 	hidden_sizes: [250, 150, 150, 350]
[34m[1mwandb[0m: 	initialization: orthogonal
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 0.0006998240912728084
[34m[1mwandb[0m: 	l2_factor: 7.304377440626881e-06
[34m[1mwandb

Config del trial
{'GAE_lambda': 0.95, 'T': 512, 'activation': 'lrelu', 'actor_lr': 0.0054534714479357324, 'adv_std': True, 'bn': False, 'clipping_epsilon': 0.2, 'critic_lr': 0.0002850386939791313, 'decay_method': 'exponential', 'dropout_prob': 0.1, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.0297418762920168, 'epochs': 10, 'exponential_factor': 0.9723855316915349, 'gamma': 0.95, 'hidden_sizes': [250, 150, 150, 350], 'initialization': 'orthogonal', 'input_size': 10, 'l1_factor': 0.0006998240912728084, 'l2_factor': 7.304377440626881e-06, 'lrelu': 0.01, 'minibatch_size': 128, 'momentum': 0.9, 'output_size': 6, 'target_kl': 0.02, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [250, 150, 150, 350], 'output_size': 6, 'dropout_prob': 0.1, 'activation': 'lrelu', 'lrelu': 0.01, 'bn': False, 'momentum': 0.9, 'initialization': 'orthogonal', 'GAE_lambda': 0.95, 'T': 512, 'actor_lr': 0.

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

[34m[1mwandb[0m: [32m[41mERROR[0m Error while calling W&B API: context deadline exceeded (<Response [500]>)
[34m[1mwandb[0m: [32m[41mERROR[0m Error while calling W&B API: context deadline exceeded (<Response [500]>)
[34m[1mwandb[0m: [32m[41mERROR[0m Error while calling W&B API: context deadline exceeded (<Response [500]>)
[34m[1mwandb[0m: Network error (HTTPError), entering retry loop.


0,1
Duration/Mean_train_ep_duration,▁▁▁▂▂▂▃▃▄▄▅▃▃▂▄▃▃▃▄▃▄▅▃▃▄▃▅▃▄▆▅█▄▃▃▃▂▃▂▂
Duration/Mean_val_ep_duration,▁▁▁▁▃▂▃▃▃▃▃▃▃▃▃▃▄▄▃▅▃▃▂▂▃▄█▅▅▅▄▅▃▂▂▂▂▂▂▂
Learning_rate/Actor,██▇▆▆▅▅▅▄▄▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,██▇▆▆▅▅▅▄▄▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,█▁▃▂▂▃▃▃▂▂▂▃▂▃▃▃▄▂▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▄▄▃▄▃▄▃
Loss/Critic_loss,█▃▅▆▅▅▃▄▃▂▂▂▃▃▂▃▂▄▄▄▂▄▃▃▃▃▄▁▄▃▄▂▃▄▆▄▅▄▅▅
Loss/Entropy_bonus,▄▁▆▆▅▅▆▄▅▆▅▆▇▆▆▆▆▆▆▆▆▇▆▆▇▇▆█▇▇▇▇▇▇▇▆▆▇▆▆
Loss/KL_divergence,█▁▇▇▇▆▅▆▇▆▅▇▅▇▇▆▇▆▆▆▇▇▇▇▆▇█▆▇▇▇▇▇▇▇▇▆▇▇▇
Loss/Policy_loss,█▁▃▃▃▃▄▄▃▃▃▄▃▄▄▄▄▃▄▄▄▄▄▃▄▄▃▄▄▄▄▄▄▄▄▄▅▄▄▄
Loss/Regularized_Actor_loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
Duration/Mean_train_ep_duration,53.625
Duration/Mean_val_ep_duration,52.2
Learning_rate/Actor,0.0001
Learning_rate/Critic,1e-05
Loss/Actor_loss,-0.02219
Loss/Critic_loss,9.63276
Loss/Entropy_bonus,1.18623
Loss/KL_divergence,-0.00659
Loss/Policy_loss,0.01309
Loss/Regularized_Actor_loss,0.00094


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: q76kj0op with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 1024
[34m[1mwandb[0m: 	activation: lrelu
[34m[1mwandb[0m: 	actor_lr: 0.00016671551570304475
[34m[1mwandb[0m: 	adv_std: True
[34m[1mwandb[0m: 	bn: True
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.00013174262551976423
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.1
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.029382194309433982
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.9059728709191304
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	hidden_sizes: [350, 350, 250, 350]
[34m[1mwandb[0m: 	initialization: orthogonal
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 1.1

Config del trial
{'GAE_lambda': 0.95, 'T': 1024, 'activation': 'lrelu', 'actor_lr': 0.00016671551570304475, 'adv_std': True, 'bn': True, 'clipping_epsilon': 0.2, 'critic_lr': 0.00013174262551976423, 'decay_method': 'exponential', 'dropout_prob': 0.1, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.029382194309433982, 'epochs': 10, 'exponential_factor': 0.9059728709191304, 'gamma': 0.99, 'hidden_sizes': [350, 350, 250, 350], 'initialization': 'orthogonal', 'input_size': 10, 'l1_factor': 1.1177056547242217e-05, 'l2_factor': 8.132121737519819e-06, 'lrelu': 0.01, 'minibatch_size': 64, 'momentum': 0.99, 'output_size': 6, 'target_kl': 0.01, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [350, 350, 250, 350], 'output_size': 6, 'dropout_prob': 0.1, 'activation': 'lrelu', 'lrelu': 0.01, 'bn': True, 'momentum': 0.99, 'initialization': 'orthogonal', 'GAE_lambda': 0.95, 'T': 1024, 'actor_l

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▂▃▄▄▄▆▇▅▇▇▇▆▅▆▇▆▅▇▆▆█▆▇▇▆▇▆▆▆▆▆▆▆██▇▇█▆
Duration/Mean_val_ep_duration,▁▂▃▅▄▇▆▆▆▇▇▇▇▇▇▆▆▅█▇▆▇▅▇▇▆▇▆▇▅█▅▅▆█▆▆▆█▆
Learning_rate/Actor,█▇▆▅▄▄▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,█▇▆▅▄▄▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,▁▆▂▃▆▂▃▂▆▄▆▄▆▂▅▅█▆▅▂▃█▃▆▅▆▄▇▁▅▅▅▄▅▄▃▆▅▅▃
Loss/Critic_loss,█▃▃▃▄▃▃▂▃▂▂▂▂▃▂▁▂▂▁▂▁▂▂▂▂▂▂▂▂▂▂▄▂▂▂▁▂▁▁▂
Loss/Entropy_bonus,█▇▆▆▅▃▃▃▃▂▂▂▂▂▂▂▁▂▂▁▁▂▂▂▂▁▂▁▂▁▁▁▂▂▂▁▂▂▁▁
Loss/KL_divergence,▅▆▅▅█▄▆▅▂▇▇▆▅▅▆▅▅▆▇▆▆▄▅▅▇▄▄▅▁█▅▃▆▄▇▅▅▄▃▄
Loss/Policy_loss,▁▆▂▃▆▂▃▁▆▄▆▄▆▂▄▄█▅▅▂▃█▃▆▅▆▄▇▁▅▅▅▄▅▃▂▆▅▅▃
Loss/Regularized_Actor_loss,▃▇▂▃▆▂▃▁▆▄▆▄▆▂▅▄█▅▅▂▃█▃▆▅▆▄▇▁▅▅▅▄▅▃▂▅▅▅▃

0,1
Duration/Mean_train_ep_duration,120.0
Duration/Mean_val_ep_duration,121.3
Learning_rate/Actor,0.0
Learning_rate/Critic,0.0
Loss/Actor_loss,-0.05429
Loss/Critic_loss,5.07892
Loss/Entropy_bonus,1.37655
Loss/KL_divergence,-0.00329
Loss/Policy_loss,-0.01385
Loss/Regularized_Actor_loss,0.14353


[34m[1mwandb[0m: Agent Starting Run: y4zehi84 with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 256
[34m[1mwandb[0m: 	activation: lrelu
[34m[1mwandb[0m: 	actor_lr: 0.0001625436695896466
[34m[1mwandb[0m: 	adv_std: False
[34m[1mwandb[0m: 	bn: True
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.00017691365326878136
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.2
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.04277513476487068
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.8620759035516059
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	hidden_sizes: [150, 350, 150, 350]
[34m[1mwandb[0m: 	initialization: normal
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 7.3055771312428e-05
[34m[1mwandb[0m: 	l2_factor: 3.529350425054039e-06
[34m[1mwandb[0m:

Config del trial
{'GAE_lambda': 0.95, 'T': 256, 'activation': 'lrelu', 'actor_lr': 0.0001625436695896466, 'adv_std': False, 'bn': True, 'clipping_epsilon': 0.2, 'critic_lr': 0.00017691365326878136, 'decay_method': 'exponential', 'dropout_prob': 0.2, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.04277513476487068, 'epochs': 10, 'exponential_factor': 0.8620759035516059, 'gamma': 0.99, 'hidden_sizes': [150, 350, 150, 350], 'initialization': 'normal', 'input_size': 10, 'l1_factor': 7.3055771312428e-05, 'l2_factor': 3.529350425054039e-06, 'lrelu': 0.01, 'minibatch_size': 128, 'momentum': 0.99, 'output_size': 6, 'target_kl': 0.02, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [150, 350, 150, 350], 'output_size': 6, 'dropout_prob': 0.2, 'activation': 'lrelu', 'lrelu': 0.01, 'bn': True, 'momentum': 0.99, 'initialization': 'normal', 'GAE_lambda': 0.95, 'T': 256, 'actor_lr': 0.0001625

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▁▂▂▄▃▃▄▄▃▃▅▄▂▂▆▃▅▃▃▄▆▃▃▃▂▃▄▆▅▅▃▄▄█▃▆▄▅▄
Duration/Mean_val_ep_duration,▁▂▄▃▅▄▇▆▇▇▅▄▆▃▃▇▅▆▇▆█▄▄▆▅▇▇▄▆█▅▇▇█▆▆▄▅▅▅
Learning_rate/Actor,█▇▅▅▄▃▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,█▇▅▅▄▃▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,▆█▄▄▂▃▃▃▃▃▃▃▂▃▃▁▃▂▄▃▂▂▃▃▃▄▂▂▂▂▃▂▃▁▁▂▂▃▂▂
Loss/Critic_loss,▄█▄▄▂▂▃▃▂▂▂▂▂▃▃▁▃▂▄▃▂▂▃▃▃▃▂▂▂▂▃▁▂▁▁▂▁▂▁▂
Loss/Entropy_bonus,█▅▂▂▄▄▄▄▃▃▂▂▂▂▂▁▁▂▁▁▁▁▁▁▂▁▂▂▁▂▁▁▁▂▁▁▂▁▁▂
Loss/KL_divergence,▃▃▁▅▇▆▆▅▆▄▆█▆█▆▆▆▃▄▄▆▅▅▅▇▆▇▅▇▆▅▆▄▃▆▇▅▆▅▅
Loss/Policy_loss,▆█▄▄▂▃▃▃▃▃▃▃▂▃▃▁▃▂▄▃▂▂▃▃▃▄▂▂▂▂▃▂▃▁▁▂▂▃▂▂
Loss/Regularized_Actor_loss,▆█▄▄▂▃▃▃▃▃▃▃▂▃▃▁▃▂▄▃▂▂▃▃▃▄▂▂▂▂▃▂▃▁▁▂▂▃▂▂

0,1
Duration/Mean_train_ep_duration,41.2
Duration/Mean_val_ep_duration,37.4
Learning_rate/Actor,0.0
Learning_rate/Critic,0.0
Loss/Actor_loss,14.67965
Loss/Critic_loss,14.83795
Loss/Entropy_bonus,1.7141
Loss/KL_divergence,0.00079
Loss/Policy_loss,14.75297
Loss/Regularized_Actor_loss,15.5955


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 0l8eaz23 with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 256
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	actor_lr: 0.0002420052755556511
[34m[1mwandb[0m: 	adv_std: True
[34m[1mwandb[0m: 	bn: False
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.00020101834771530535
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.1
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.03786513383268597
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.8763155602967676
[34m[1mwandb[0m: 	gamma: 0.9
[34m[1mwandb[0m: 	hidden_sizes: [350, 150]
[34m[1mwandb[0m: 	initialization: uniform
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 2.6913833897612094e-

Config del trial
{'GAE_lambda': 0.95, 'T': 256, 'activation': 'tanh', 'actor_lr': 0.0002420052755556511, 'adv_std': True, 'bn': False, 'clipping_epsilon': 0.2, 'critic_lr': 0.00020101834771530535, 'decay_method': 'exponential', 'dropout_prob': 0.1, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.03786513383268597, 'epochs': 10, 'exponential_factor': 0.8763155602967676, 'gamma': 0.9, 'hidden_sizes': [350, 150], 'initialization': 'uniform', 'input_size': 10, 'l1_factor': 2.6913833897612094e-06, 'l2_factor': 8.920771445586116e-05, 'lrelu': 0.001, 'minibatch_size': 256, 'momentum': 0.9, 'output_size': 6, 'target_kl': 0.02, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [350, 150], 'output_size': 6, 'dropout_prob': 0.1, 'activation': 'tanh', 'lrelu': 0.001, 'bn': False, 'momentum': 0.9, 'initialization': 'uniform', 'GAE_lambda': 0.95, 'T': 256, 'actor_lr': 0.0002420052755556511, 'ad

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁█▂▄▆▆▆▅▇▂▆▅▆▅▄▆▃▅▆▂▆▄▂▃▃▃▄▅▄▆▆▄▅▄▃▃▄▅▃▄
Duration/Mean_val_ep_duration,▁▇▆▃▅▅▅▆▂▅▇▅▇▅▆▅▆▆▆▅█▂▇▇▆▅▅▄▇▇▅█▆▆▅▅▆▇▇▅
Learning_rate/Actor,█▆▄▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,█▆▄▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,▁▆▆▇▇▇▇▇▇█▇▇▇█▇▇█▇▇█▇██▇█▇▇▇█▇██▇▇█▇▇▇▇▇
Loss/Critic_loss,█▁▄▄▂▂▁▄▂▇▂▃▃▃▂▂▅▂▃▆▂▃▆▃▅▃▄▅▄▂▂▃▃▂▆▃▂▃▅▂
Loss/Entropy_bonus,█▅▅▂▃▁▂▃▄▃▁▁▂▂▂▁▂▃▂▂▂▂▃▃▃▃▂▃▃▂▂▂▁▃▂▃▂▁▂▂
Loss/KL_divergence,█▃▄▄▃▂▁▃▂▂▂▂▂▂▂▂▂▂▂▃▂▂▂▂▂▁▂▂▂▁▂▃▂▂▂▂▁▂▂▂
Loss/Policy_loss,▁▆▆▇▇▇▇▇▇█▇▇▇█▇▇█▇▇█▇██▇█▇▇▇█▇▇█▇▇█▇▇▇▇▇
Loss/Regularized_Actor_loss,▁▆▆▇▇▇▇▇▇█▇▇▇█▇▇█▇▇█▇██▇█▇▇▇█▇██▇▇█▇▇▇▇▇

0,1
Duration/Mean_train_ep_duration,42.0
Duration/Mean_val_ep_duration,46.4
Learning_rate/Actor,0.0
Learning_rate/Critic,0.0
Loss/Actor_loss,-0.05991
Loss/Critic_loss,8.05311
Loss/Entropy_bonus,1.56565
Loss/KL_divergence,-0.00125
Loss/Policy_loss,-0.00062
Loss/Regularized_Actor_loss,-0.01063


[34m[1mwandb[0m: Agent Starting Run: x1bhoakz with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 256
[34m[1mwandb[0m: 	activation: lrelu
[34m[1mwandb[0m: 	actor_lr: 0.00035885936435946795
[34m[1mwandb[0m: 	adv_std: True
[34m[1mwandb[0m: 	bn: True
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.0011369456031649192
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.1
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.0459592267784181
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.9849727586449928
[34m[1mwandb[0m: 	gamma: 0.95
[34m[1mwandb[0m: 	hidden_sizes: [150, 150, 150, 250]
[34m[1mwandb[0m: 	initialization: orthogonal
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 1.068037321520035e-05
[34m[1mwandb[0m: 	l2_factor: 2.8266716305906706e-06
[34m[1mwandb

Config del trial
{'GAE_lambda': 0.95, 'T': 256, 'activation': 'lrelu', 'actor_lr': 0.00035885936435946795, 'adv_std': True, 'bn': True, 'clipping_epsilon': 0.2, 'critic_lr': 0.0011369456031649192, 'decay_method': 'exponential', 'dropout_prob': 0.1, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.0459592267784181, 'epochs': 10, 'exponential_factor': 0.9849727586449928, 'gamma': 0.95, 'hidden_sizes': [150, 150, 150, 250], 'initialization': 'orthogonal', 'input_size': 10, 'l1_factor': 1.068037321520035e-05, 'l2_factor': 2.8266716305906706e-06, 'lrelu': 0.01, 'minibatch_size': 128, 'momentum': 0.95, 'output_size': 6, 'target_kl': 0.02, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [150, 150, 150, 250], 'output_size': 6, 'dropout_prob': 0.1, 'activation': 'lrelu', 'lrelu': 0.01, 'bn': True, 'momentum': 0.95, 'initialization': 'orthogonal', 'GAE_lambda': 0.95, 'T': 256, 'actor_lr': 

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▃▂▃▃▄▄▅▅▄▆▅▄▅█▅▄▅▆▇▄▆▆▆▃▄▃▆▅▇▇█▁▆▆▃▅▃▇▇
Duration/Mean_val_ep_duration,▁▂▃▄▄▄▄▃▄▄▅▇▆█▅▆▆▇█▇▇▇▇▅▆▅▆▆▆▅▇█▇▇▆▄▅▅▅▇
Learning_rate/Actor,██▇▇▆▆▆▆▅▅▅▅▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁
Learning_rate/Critic,██▇▇▆▆▆▆▅▅▅▅▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁
Loss/Actor_loss,▁▃▃▄▄▄▄▆▅▄▅▅▄▅▅▅▄▆▆▅▆▅▆▆▆▆▅▆▅▅▅▅▆▆▆▅▄▇▆█
Loss/Critic_loss,█▄▄▂▂▂▂▃▃▂▂▂▃▂▂▁▁▁▁▃▂▃▂▂▂▁▂▂▂▂▁▂▁▃▃▂▂▂▂▁
Loss/Entropy_bonus,██▆▅▄▄▄▃▃▃▃▃▃▃▃▃▃▂▃▃▃▁▂▂▂▂▂▂▂▂▂▁▂▁▃▂▁▁▂▂
Loss/KL_divergence,▄▄▅▄▂▃▄▁▆▁▃▃▃▃▅▃▅▂▄▃▁▂▄▇▄▄▅▂▃▃▁▂▃▂█▇▄▂▆▅
Loss/Policy_loss,▁▃▃▄▄▄▄▅▅▃▄▄▃▄▄▄▃▅▆▄▅▅▅▆▆▅▄▆▄▄▄▄▅▅▆▅▃▆▅█
Loss/Regularized_Actor_loss,▁▃▃▄▄▄▄▆▅▄▅▅▄▅▅▅▄▆▆▄▆▅▆▆▆▆▅▆▅▅▅▅▆▅▆▅▄▇▆█

0,1
Duration/Mean_train_ep_duration,132.0
Duration/Mean_val_ep_duration,128.8
Learning_rate/Actor,4e-05
Learning_rate/Critic,0.00013
Loss/Actor_loss,-0.01159
Loss/Critic_loss,2.37493
Loss/Entropy_bonus,1.09953
Loss/KL_divergence,0.00365
Loss/Policy_loss,0.03895
Loss/Regularized_Actor_loss,0.065


[34m[1mwandb[0m: Agent Starting Run: 1izhgv97 with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 1024
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	actor_lr: 0.0001005445142864693
[34m[1mwandb[0m: 	adv_std: True
[34m[1mwandb[0m: 	bn: True
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.0021163384057797567
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.1
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.0011815322211500177
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.91625657352732
[34m[1mwandb[0m: 	gamma: 0.9
[34m[1mwandb[0m: 	hidden_sizes: [250, 250, 150, 350]
[34m[1mwandb[0m: 	initialization: orthogonal
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 3.299869328037109e-05
[34m[1mwandb[0m: 	l2_factor: 1.913925119536716e-06
[34m[1mwandb[

Config del trial
{'GAE_lambda': 0.95, 'T': 1024, 'activation': 'tanh', 'actor_lr': 0.0001005445142864693, 'adv_std': True, 'bn': True, 'clipping_epsilon': 0.2, 'critic_lr': 0.0021163384057797567, 'decay_method': 'exponential', 'dropout_prob': 0.1, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.0011815322211500177, 'epochs': 10, 'exponential_factor': 0.91625657352732, 'gamma': 0.9, 'hidden_sizes': [250, 250, 150, 350], 'initialization': 'orthogonal', 'input_size': 10, 'l1_factor': 3.299869328037109e-05, 'l2_factor': 1.913925119536716e-06, 'lrelu': 0.01, 'minibatch_size': 64, 'momentum': 0.9, 'output_size': 6, 'target_kl': 0.01, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [250, 250, 150, 350], 'output_size': 6, 'dropout_prob': 0.1, 'activation': 'tanh', 'lrelu': 0.01, 'bn': True, 'momentum': 0.9, 'initialization': 'orthogonal', 'GAE_lambda': 0.95, 'T': 1024, 'actor_lr': 0.000

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▂▃▃▃▅▆▅▆▅▆▅▆▆▅▆▆▅█▆▆▅▆▅▆▆▅▆▆▆▅▆▅▆▇▅▆▆▆▆
Duration/Mean_val_ep_duration,▁▃▂▄▆▅▆▆▇▇▆▇▇▇█▇▆█▇▇▇▇▇▇▇▇▇▇▇▆▇▇██▇▇▇▇▇▆
Learning_rate/Actor,█▇▆▅▄▄▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,█▇▆▅▄▄▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,▂▂▁▂▃▃▃▄▃▃▄▇▄▄▃▆▆▆▄▄▃▅▃▃▄▄▇▄▂█▅▄▄▅▄▄▄▅▆▃
Loss/Critic_loss,█▄▄▃▃▃▄▂▂▄▁▂▁▁▂▂▁▁▁▁▁▂▁▂▂▁▂▁▁▁▂▂▁▁▂▁▁▂▁▁
Loss/Entropy_bonus,█▇▇▆▅▅▄▄▃▃▂▂▂▂▂▂▂▂▂▂▁▂▂▁▂▁▂▂▂▁▂▂▂▁▂▁▂▁▁▁
Loss/KL_divergence,▅▆▆▅▆▆▄▅▄▆▆▄▃▅▄▃▇▂▇▇▅▆▄▇▂▆▄▆▇▃▁▄▂▃▆█▃▆▄▂
Loss/Policy_loss,▂▂▁▂▃▃▃▄▃▃▃▇▄▄▃▆▆▆▄▄▃▅▃▃▄▄▇▄▂█▅▄▄▅▄▄▄▅▆▃
Loss/Regularized_Actor_loss,▃▃▁▂▃▃▂▄▃▂▃▆▃▄▂▆▆▆▃▃▃▅▂▃▃▃▇▃▁█▄▃▄▄▄▄▄▄▆▃

0,1
Duration/Mean_train_ep_duration,159.0
Duration/Mean_val_ep_duration,120.3
Learning_rate/Actor,0.0
Learning_rate/Critic,0.0
Loss/Actor_loss,-0.00644
Loss/Critic_loss,1.91978
Loss/Entropy_bonus,0.89422
Loss/KL_divergence,0.02474
Loss/Policy_loss,-0.00538
Loss/Regularized_Actor_loss,0.30271


[34m[1mwandb[0m: Agent Starting Run: l017mh79 with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 512
[34m[1mwandb[0m: 	activation: lrelu
[34m[1mwandb[0m: 	actor_lr: 0.005238121836147737
[34m[1mwandb[0m: 	adv_std: True
[34m[1mwandb[0m: 	bn: False
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.004446965773165525
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.1
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.02211632785631272
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.9842149713003936
[34m[1mwandb[0m: 	gamma: 0.9
[34m[1mwandb[0m: 	hidden_sizes: [250, 350, 250]
[34m[1mwandb[0m: 	initialization: orthogonal
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 7.726751825109328e-05
[34m[1mwandb[0m: 	l2_factor: 3.7788446711619257e-06
[34m[1mwandb[0m: 	

Config del trial
{'GAE_lambda': 0.95, 'T': 512, 'activation': 'lrelu', 'actor_lr': 0.005238121836147737, 'adv_std': True, 'bn': False, 'clipping_epsilon': 0.2, 'critic_lr': 0.004446965773165525, 'decay_method': 'exponential', 'dropout_prob': 0.1, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.02211632785631272, 'epochs': 10, 'exponential_factor': 0.9842149713003936, 'gamma': 0.9, 'hidden_sizes': [250, 350, 250], 'initialization': 'orthogonal', 'input_size': 10, 'l1_factor': 7.726751825109328e-05, 'l2_factor': 3.7788446711619257e-06, 'lrelu': 0.1, 'minibatch_size': 32, 'momentum': 0.9, 'output_size': 6, 'target_kl': 0.02, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [250, 350, 250], 'output_size': 6, 'dropout_prob': 0.1, 'activation': 'lrelu', 'lrelu': 0.1, 'bn': False, 'momentum': 0.9, 'initialization': 'orthogonal', 'GAE_lambda': 0.95, 'T': 512, 'actor_lr': 0.00523812183614

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▂▃▃▅█▄▅▃▅▃▃▃▃▅▁▁▁▁▁▁▃▃▃▃▃▃▃▃▃▃▃▃▆▆▄▁▁▁▁▁
Duration/Mean_val_ep_duration,▁▁▁▁▁█▁▁▁▁▁▁▁▁▁▁▁▁████▁▁█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Actor,██▇▇▇▇▇▆▆▆▆▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁
Learning_rate/Critic,██▇▇▇▇▇▆▆▆▆▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁
Loss/Actor_loss,▄▂▁▄▁▂▂▁▂▂▂▁▄▂▂▂▂▂▁▂▂▂▂█▁▄▂▂▂▂▂▂▄▅▃▂▂▂▂▂
Loss/Critic_loss,█▁▁▅▂▆▂▁▁▁▁▂▁▂▃▃▁▁▂▁▁▁▁▆▁▁▁▂▁▂▁▁▅▆▄▁▁▂▂▃
Loss/Entropy_bonus,▁▁▅▁▁▁▁▃▁▁▁▄▁▁▁▁▁█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▃▂▁▁▁▁▁▁
Loss/KL_divergence,█▅▃▅▅▇▅▄▅▅▅▃▅▅▅▅▅▁▅▅▅▅▅█▅▅▅▅▅▅▅▅▆▆▆▅▅▅▅▅
Loss/Policy_loss,▄▂▁▃▁▂▂▁▂▂▂▁▄▂▂▂▂▂▁▂▂▂▂█▁▄▂▂▂▂▂▂▄▅▃▂▂▂▂▂
Loss/Regularized_Actor_loss,█▂▁▆▂▃▂▁▂▂▂▁▄▂▂▂▂▁▂▂▂▂▂▇▁▃▂▂▂▂▂▂▄▆▅▃▂▂▂▂

0,1
Duration/Mean_train_ep_duration,1.0
Duration/Mean_val_ep_duration,1.0
Learning_rate/Actor,0.00186
Learning_rate/Critic,0.00158
Loss/Actor_loss,-0.0
Loss/Critic_loss,7.74717
Loss/Entropy_bonus,0.0
Loss/KL_divergence,0.0
Loss/Policy_loss,-0.0
Loss/Regularized_Actor_loss,0.02492


[34m[1mwandb[0m: Agent Starting Run: fp8tmb8c with config:
[34m[1mwandb[0m: 	GAE_lambda: 0.95
[34m[1mwandb[0m: 	T: 1024
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	actor_lr: 0.00934487810343775
[34m[1mwandb[0m: 	adv_std: True
[34m[1mwandb[0m: 	bn: True
[34m[1mwandb[0m: 	clipping_epsilon: 0.2
[34m[1mwandb[0m: 	critic_lr: 0.0030054045005490308
[34m[1mwandb[0m: 	decay_method: exponential
[34m[1mwandb[0m: 	dropout_prob: 0.3
[34m[1mwandb[0m: 	early_stopping_delta: 0
[34m[1mwandb[0m: 	early_stopping_patience: 30
[34m[1mwandb[0m: 	entropy: 0.024138576274802023
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	exponential_factor: 0.9037897758585156
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	hidden_sizes: [250, 350, 150, 150]
[34m[1mwandb[0m: 	initialization: uniform
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	l1_factor: 7.15488721828977e-06
[34m[1mwandb[0m: 	l2_factor: 7.886315722037178e-06
[34m[1mwandb[0m: 

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011277777779226502, max=1.0…

Config del trial
{'GAE_lambda': 0.95, 'T': 1024, 'activation': 'tanh', 'actor_lr': 0.00934487810343775, 'adv_std': True, 'bn': True, 'clipping_epsilon': 0.2, 'critic_lr': 0.0030054045005490308, 'decay_method': 'exponential', 'dropout_prob': 0.3, 'early_stopping_delta': 0, 'early_stopping_patience': 30, 'entropy': 0.024138576274802023, 'epochs': 10, 'exponential_factor': 0.9037897758585156, 'gamma': 0.99, 'hidden_sizes': [250, 350, 150, 150], 'initialization': 'uniform', 'input_size': 10, 'l1_factor': 7.15488721828977e-06, 'l2_factor': 7.886315722037178e-06, 'lrelu': 0.1, 'minibatch_size': 64, 'momentum': 0.95, 'output_size': 6, 'target_kl': 0.02, 'updates': 200, 'updates_per_val': 1, 'val_episodes': 10, 'value_loss_factor': 1}

Atributos actor
{'input_size': 10, 'hidden_sizes': [250, 350, 150, 150], 'output_size': 6, 'dropout_prob': 0.3, 'activation': 'tanh', 'lrelu': 0.1, 'bn': True, 'momentum': 0.95, 'initialization': 'uniform', 'GAE_lambda': 0.95, 'T': 1024, 'actor_lr': 0.0093448781

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▃▅▂▄▄▃▃▃▅▄▆██▇█▇▆█▆▇▇▇▆▇▅▇▆▆▆▆▇▅▇▇▅▅▇▅█
Duration/Mean_val_ep_duration,▁▆▄▁▂▂▁▂▃▄▄▅▇▆▇█▇▆▅█▄█▅▆▆▆▅▆▅▅▄▅▅▅▅▆▄▅▆▅
Learning_rate/Actor,█▇▆▅▄▄▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,█▇▆▅▄▄▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,▂▅▂▃▄▂█▄▁▅▁▄▂▂▆▃▃▃▄▃▃▃▃▄▃▄▂▃▄▃▃▆▄▅▄▃▃▄▃▄
Loss/Critic_loss,█▃▂▂▂▁▂▂▁▂▂▁▁▂▁▁▁▂▁▂▁▂▁▁▂▂▁▂▂▁▁▂▂▁▁▂▂▂▁▂
Loss/Entropy_bonus,█▆▃▄▂▃▂▁▂▄▄▃▄▃▂▂▂▃▄▂▃▃▃▄▃▃▃▃▄▃▃▄▄▄▄▃▃▃▃▃
Loss/KL_divergence,▆█▇▄▆▃▃▁▁▂▃▂▂▁▆▇▂▇▃▃▆▆▄▆▃▄▂▂▆▅▁▆▃▅▃▃▆▃▆▅
Loss/Policy_loss,▃▅▂▃▄▂█▄▁▅▁▄▂▂▆▃▃▃▄▃▃▃▃▄▃▄▂▃▄▃▃▇▄▅▄▃▃▄▃▄
Loss/Regularized_Actor_loss,▄▇▄▄▄▃█▄▁▅▁▄▁▂▅▃▃▃▃▃▂▂▃▄▃▃▂▃▃▂▃▆▃▄▃▃▂▄▃▃

0,1
Duration/Mean_train_ep_duration,153.16667
Duration/Mean_val_ep_duration,186.3
Learning_rate/Actor,0.0
Learning_rate/Critic,0.0
Loss/Actor_loss,-0.04758
Loss/Critic_loss,7.9739
Loss/Entropy_bonus,1.21701
Loss/KL_divergence,-0.01401
Loss/Policy_loss,-0.0182
Loss/Regularized_Actor_loss,-0.01998
