<a href="https://colab.research.google.com/github/hchaparov/Dynamic_Pricing_MARL/blob/main/Bachelor_thesis_baseline.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Environment

**Stationary Demand market:**



In [2]:
!pip install gymnasium
!pip install numpy
!pip install torch
!pip install stable_baselines3 #"stable-baselines3[extra]>=2.0.0a4"
!pip install huggingface_sb3

Collecting gymnasium
  Downloading gymnasium-0.29.1-py3-none-any.whl (953 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m953.9/953.9 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
Collecting farama-notifications>=0.0.1 (from gymnasium)
  Downloading Farama_Notifications-0.0.4-py3-none-any.whl (2.5 kB)
Installing collected packages: farama-notifications, gymnasium
Successfully installed farama-notifications-0.0.4 gymnasium-0.29.1
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manyli

In [20]:
from pickle import TRUE
import numpy as np
import gymnasium as gym
from gymnasium import spaces
import torch
import torch.nn as nn
import torch.optim as optim
from torch.distributions import Categorical

class OligopolyMarketEnv(gym.Env):

   # Because of google colab, we cannot implement the GUI ('human' render mode)
    metadata = {"render_modes": ["console"]}

    def __init__(self, a, b, beta_G, beta_L, reference_price, c, a_phi, before, max_steps, render_mode="console"):
        super(OligopolyMarketEnv, self).__init__()
        self.render_mode = render_mode
        self.reference_price = reference_price
        self.a = a
        self.b = b
        self.beta_G = beta_G
        self.beta_L = beta_L
        self.c = c  # costs (Lower boundary for prices)
        self.a_phi = a_phi
        self.before = before
        self.max_steps = max_steps # change of the price |max_steps| per season
        self.last_action = None
        self.last_profit = None
        self.uv_buffer = []
        self.revenue_buffer = []
        self.t = 0
        self.upper_bound = None
        action = 0
        self.count = max_steps



         # Determine the upper bound of the action space
        if self.reference_price <= self.a / self.b:
            self.upper_bound = min((self.a + self.beta_G * self.reference_price) / (self.b + self.beta_G), (self.a - self.b * self.reference_price + self.beta_G * self.reference_price)/(self.beta_G))
        else:
            self.upper_bound = min((self.a + self.beta_L * self.reference_price) / (self.b + self.beta_L), (self.a - self.b * self.reference_price + self.beta_L * self.reference_price)/(self.beta_L))

        # Ensure the upper bound is at least greater than the lower bound c
        if self.upper_bound < self.c:
           print("Watch out: upper_bound < costs")

        # Action space (price set by the firm) is bounded
        # self.action_space = spaces.Box(low=np.array([c], dtype=np.float32), high=np.array([upper_bound], dtype=np.float32), dtype=np.float32)
        # Normalize action [-1, 1]
        self.action_space = spaces.Box(low=np.array([-1], dtype=np.float32), high=np.array([1], dtype=np.float32), dtype=np.float32)

        # State space is the reference price
        self.observation_space = spaces.Box(low=np.array([0], dtype=np.float32), high=np.array([np.inf], dtype=np.float32), shape=(1,), dtype=np.float32)

    def step(self, action):
        # rescale the action space [c, upper_bound]
        # action = action
        rescaled_action = self.c + ((action + 1.01) / 2 )* (self.upper_bound - self.c)
        price = rescaled_action
        done = False
        # average_price = price  # Simplified for single agent
        # b_phi = 1


        # Implementing demand function
        if self.reference_price > price:
            demand = self.a - self.b * self.reference_price + self.beta_G * (self.reference_price - price)
        else:
            demand = self.a - self.b * self.reference_price + self.beta_L * (self.reference_price - price)

        demand = np.array(demand).item()
        demand = int(np.floor(demand))

        # Ensuring float type inside the environment
        # Converting from array to scalar if needed
        if isinstance(price, np.ndarray):
          price = price.item()

        if isinstance(demand, np.ndarray):
          demand = demand.item()

        revenue = price * demand
        self.revenue_buffer.append(revenue)
        self.uv_buffer.append(np.random.randint(demand + 1, 1000))

        # immediate reward function
        if self.t == 0 or self.before >= self.t:
           reward = self.revenue_buffer[self.t] / self.uv_buffer[self.t]
        else:
           reward = (self.revenue_buffer[self.t] / self.uv_buffer[self.t]) - (self.revenue_buffer[self.t - self.before] / self.uv_buffer[self.t - self.before])


        # Store the last action and last profit for render()
        self.last_action = rescaled_action
        self.last_profit = revenue

        next_state = np.array([self.reference_price]).astype(np.float32)  # State is constant
        self.t += 1

        if self.t == self.max_steps:
          done = True
          self.max_steps += self.count


        return next_state, reward, done, False, {} #, action, price, demand, self.t, rescaled_action, self.revenue_buffer, self.uv_buffer


    def reset(self, seed=None, options=None):
        super().reset(seed=seed, options=options)
       # we convert to float32 to make it more general, because we want to use continuous actions
        return np.array([self.reference_price]).astype(np.float32), {}


    def render(self):
      if self.render_mode == "console":
          if self.last_action is not None and self.last_profit is not None:
              # Ensure last_action and last_profit are scalars for formatting
              if isinstance(self.last_action, np.ndarray):
                  if self.last_action.size == 1:
                      last_action = self.last_action.item()  # Convert single element array to scalar
                  else:
                      last_action = self.last_action
                      print("Error: last_action is not a single element array")
              else:
                  last_action = self.last_action  # if already scalar

              if isinstance(self.last_profit, np.ndarray):
                  if self.last_profit.size == 1:
                      last_profit = self.last_profit.item()
                  else:
                      last_profit = self.last_profit
                      print("Error: last_profit is not a single element array")
              else:
                  last_profit = self.last_profit

              print(f"Reference Price: {self.reference_price:.3f}")
              print(f"Last Action (Price Set by Firm): {last_action:.3f}")
              print(f"Last Profit: {last_profit:.3f}")


    def close(self):
        pass



TEST OF THE NON-TRAINED ENV:

In [4]:
#Test for compatability with the algorithms of stable_baselines3
from stable_baselines3.common.env_checker import check_env
env = OligopolyMarketEnv(a = 10, b = 1, beta_G = 2, beta_L = 2, reference_price = 1.0, c = 0, a_phi = 1, before = 2, max_steps = 3)
# If the environment doesn't follow the interface, an error will be thrown
check_env(env, warn=True)

In [12]:
import random
env = OligopolyMarketEnv(a = 10, b = 1, beta_G = 2, beta_L = 2, reference_price = 1.0, c = 0, a_phi = 1, before = 2, max_steps = 3)

obs, _ = env.reset()
env.render()

print(env.observation_space)
print(env.action_space)
print(env.action_space.sample())


# Test:
n_steps = 20
for step in range(n_steps):
    price = random.uniform(-1, 1)
    print(f"Step {step + 1}")
    obs, reward, done, false, info, action, price, demand, t, rescaled_action, revenue_buffer, uv_buffer = env.step(price)
    print( "reward=", reward, "action=", action, "done =", done, ",price=", price, ",demand=", demand,",t = ", t, ",rescaled_action=", rescaled_action, ",revenue_buffer=", revenue_buffer, ",uv_buffer=", uv_buffer)
    env.render()

Box(0.0, inf, (1,), float32)
Box(-1.0, 1.0, (1,), float32)
[-0.50006974]
Step 1
reward= 0.0008989268667747776 action= -0.9683347397249891 done = False ,price= 0.08333052055002188 ,demand= 10 ,t =  1 ,rescaled_action= 0.08333052055002188 ,revenue_buffer= [0.8333052055002188] ,uv_buffer= [927]
Reference Price: 1.000
Last Action (Price Set by Firm): 0.083
Last Profit: 0.833
Step 2
reward= 0.06958028688963223 action= 0.7208096363796019 done = False ,price= 3.461619272759204 ,demand= 4 ,t =  2 ,rescaled_action= 3.461619272759204 ,revenue_buffer= [0.8333052055002188, 13.846477091036816] ,uv_buffer= [927, 199]
Reference Price: 1.000
Last Action (Price Set by Firm): 3.462
Last Profit: 13.846
Step 3
reward= 0.015302518936511543 action= 0.5145134490499625 done = True ,price= 3.049026898099925 ,demand= 4 ,t =  3 ,rescaled_action= 3.049026898099925 ,revenue_buffer= [0.8333052055002188, 13.846477091036816, 12.1961075923997] ,uv_buffer= [927, 199, 797]
Reference Price: 1.000
Last Action (Price Set b

Vectorize the environment:

In [5]:
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.monitor import Monitor

vec_env = make_vec_env(OligopolyMarketEnv, n_envs=10, env_kwargs=dict(a = 10, b = 1, beta_G = 2, beta_L = 2, reference_price = 1.0, c = 0, a_phi = 1, before = 2, max_steps = 3))

# **PPO model**

In [28]:
model = PPO(
    policy = 'MlpPolicy',
    env = vec_env,
    seed = 0,
    n_steps = 1970,
    batch_size = 128,
    n_epochs = 5,
    gamma = 0.9601882016991302,
    gae_lambda = 0.889617767476432,
    ent_coef = 0.027907021786249108,
    verbose=1).learn(10000000)

model_name = "ppo-Oligopoly"
model.save(model_name)


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
|    loss                 | 0.0108       |
|    n_updates            | 1400         |
|    policy_gradient_loss | -0.000151    |
|    std                  | 0.354        |
|    value_loss           | 0.0788       |
------------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 3            |
|    ep_rew_mean          | 0.000569     |
| time/                   |              |
|    fps                  | 3007         |
|    iterations           | 282          |
|    time_elapsed         | 1847         |
|    total_timesteps      | 5555400      |
| train/                  |              |
|    approx_kl            | 0.0002661744 |
|    clip_fraction        | 0.00141      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.387       |
|    explained_variance   | 0            |
|    learning_rate        | 0.00

# Automatic Hyperparameter Tuning:

In [7]:
!pip install optuna

Collecting optuna
  Downloading optuna-3.6.1-py3-none-any.whl (380 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m380.1/380.1 kB[0m [31m6.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.13.1-py3-none-any.whl (233 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m233.4/233.4 kB[0m [31m12.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting colorlog (from optuna)
  Downloading colorlog-6.8.2-py3-none-any.whl (11 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading Mako-1.3.3-py3-none-any.whl (78 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.8/78.8 kB[0m [31m9.4 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: Mako, colorlog, alembic, optuna
Successfully installed Mako-1.3.3 alembic-1.13.1 colorlog-6.8.2 optuna-3.6.1


In [26]:
import optuna

def objective(trial):

    # Hyperparameter boundaries
    n_steps = trial.suggest_int('n_steps', 256, 2048)
    batch_size = trial.suggest_categorical('batch_size', [32, 64, 128])
    n_epochs = trial.suggest_int('n_epochs', 3, 10)
    gamma = trial.suggest_uniform('gamma', 0.9, 0.9999)
    gae_lambda = trial.suggest_uniform('gae_lambda', 0.8, 0.99)
    ent_coef = trial.suggest_loguniform('ent_coef', 0.01, 0.1)

    # Setup the model
    model = PPO(
        policy='MlpPolicy',
        env=vec_env,
        seed=0,
        n_steps=n_steps,
        batch_size=batch_size,
        n_epochs=n_epochs,
        gamma=gamma,
        gae_lambda=gae_lambda,
        ent_coef=ent_coef,
        verbose=1
    )

    # Train the model
    model.learn(total_timesteps=20000)

    # Evaluate the model
    obs = vec_env.reset()
    total_rewards = 0
    done = np.array([False])
    while not done.all():
        action, _ = model.predict(obs, deterministic=False)
        obs, rewards, done, _info = vec_env.step(action)
        total_rewards += rewards.sum()

    # Save the model
    model_name = f"ppo-Oligopoly-{trial.number}"
    model.save(model_name)

    return total_rewards



In [27]:
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=100)

# Best hyperparameters
print("Best trial:", study.best_trial.params)


[I 2024-05-01 09:31:35,283] A new study created in memory with name: no-name-2f7838cd-d144-40fe-87ce-ef0053a0a940
  gamma = trial.suggest_uniform('gamma', 0.9, 0.9999)
  gae_lambda = trial.suggest_uniform('gae_lambda', 0.8, 0.99)
  ent_coef = trial.suggest_loguniform('ent_coef', 0.01, 0.1)
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=2019 and n_envs=10)


Using cpu device
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 3         |
|    ep_rew_mean     | -0.000787 |
| time/              |           |
|    fps             | 6506      |
|    iterations      | 1         |
|    time_elapsed    | 3         |
|    total_timesteps | 20190     |
----------------------------------


[I 2024-05-01 09:31:44,282] Trial 0 finished with value: 0.6998485624790192 and parameters: {'n_steps': 2019, 'batch_size': 128, 'n_epochs': 7, 'gamma': 0.9825714383907151, 'gae_lambda': 0.9425304423608996, 'ent_coef': 0.01054622780534942}. Best is trial 0 with value: 0.6998485624790192.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1974 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | 0.000178 |
| time/              |          |
|    fps             | 7737     |
|    iterations      | 1        |
|    time_elapsed    | 2        |
|    total_timesteps | 19740    |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 3            |
|    ep_rew_mean          | 0.00197      |
| time/                   |              |
|    fps                  | 2450         |
|    iterations           | 2            |
|    time_elapsed         | 16           |
|    total_timesteps      | 39480        |
| train/                  |              |
|    approx_kl            | 0.0074380543 |
|    clip_fraction        | 0.103        |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.41        |
|    explained_variance   | 0            

[I 2024-05-01 09:32:11,077] Trial 1 finished with value: 0.04398512840270996 and parameters: {'n_steps': 1974, 'batch_size': 32, 'n_epochs': 4, 'gamma': 0.997299503664903, 'gae_lambda': 0.8221147114798086, 'ent_coef': 0.0805656874655687}. Best is trial 0 with value: 0.6998485624790192.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1481 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | 0.00251  |
| time/              |          |
|    fps             | 5867     |
|    iterations      | 1        |
|    time_elapsed    | 2        |
|    total_timesteps | 14810    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | -0.00772    |
| time/                   |             |
|    fps                  | 3704        |
|    iterations           | 2           |
|    time_elapsed         | 7           |
|    total_timesteps      | 29620       |
| train/                  |             |
|    approx_kl            | 0.007787656 |
|    clip_fraction        | 0.0944      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.41       |
|    explained_variance   | 0           |
|    learning

[I 2024-05-01 09:32:22,883] Trial 2 finished with value: -0.11024989560246468 and parameters: {'n_steps': 1481, 'batch_size': 128, 'n_epochs': 6, 'gamma': 0.9461149682193465, 'gae_lambda': 0.979415924958467, 'ent_coef': 0.05594765999502983}. Best is trial 0 with value: 0.6998485624790192.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1227 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | -0.0034  |
| time/              |          |
|    fps             | 7723     |
|    iterations      | 1        |
|    time_elapsed    | 1        |
|    total_timesteps | 12270    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | 0.00117     |
| time/                   |             |
|    fps                  | 3069        |
|    iterations           | 2           |
|    time_elapsed         | 7           |
|    total_timesteps      | 24540       |
| train/                  |             |
|    approx_kl            | 0.007988461 |
|    clip_fraction        | 0.102       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.41       |
|    explained_variance   | 0           |
|    learning

[I 2024-05-01 09:32:36,377] Trial 3 finished with value: 0.38901442289352417 and parameters: {'n_steps': 1227, 'batch_size': 32, 'n_epochs': 3, 'gamma': 0.9592996121116376, 'gae_lambda': 0.850414634249354, 'ent_coef': 0.0693043172351606}. Best is trial 0 with value: 0.6998485624790192.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1898 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | -0.00312 |
| time/              |          |
|    fps             | 7877     |
|    iterations      | 1        |
|    time_elapsed    | 2        |
|    total_timesteps | 18980    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | 0.000694    |
| time/                   |             |
|    fps                  | 5104        |
|    iterations           | 2           |
|    time_elapsed         | 7           |
|    total_timesteps      | 37960       |
| train/                  |             |
|    approx_kl            | 0.010628385 |
|    clip_fraction        | 0.111       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.4        |
|    explained_variance   | 0           |
|    learning

[I 2024-05-01 09:32:47,284] Trial 4 finished with value: -0.10483400151133537 and parameters: {'n_steps': 1898, 'batch_size': 128, 'n_epochs': 4, 'gamma': 0.9811073992045782, 'gae_lambda': 0.8371388517191884, 'ent_coef': 0.03541489013672162}. Best is trial 0 with value: 0.6998485624790192.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=425 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | 0.000879 |
| time/              |          |
|    fps             | 7833     |
|    iterations      | 1        |
|    time_elapsed    | 0        |
|    total_timesteps | 4250     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | -0.00387    |
| time/                   |             |
|    fps                  | 3469        |
|    iterations           | 2           |
|    time_elapsed         | 2           |
|    total_timesteps      | 8500        |
| train/                  |             |
|    approx_kl            | 0.008295469 |
|    clip_fraction        | 0.0739      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.43       |
|    explained_variance   | 0           |
|    learning

[I 2024-05-01 09:32:57,015] Trial 5 finished with value: 0.604412280023098 and parameters: {'n_steps': 425, 'batch_size': 128, 'n_epochs': 9, 'gamma': 0.9336849001578923, 'gae_lambda': 0.897998746921739, 'ent_coef': 0.09819774022721718}. Best is trial 0 with value: 0.6998485624790192.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1883 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | -0.0107  |
| time/              |          |
|    fps             | 5992     |
|    iterations      | 1        |
|    time_elapsed    | 3        |
|    total_timesteps | 18830    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | -0.00489    |
| time/                   |             |
|    fps                  | 1323        |
|    iterations           | 2           |
|    time_elapsed         | 28          |
|    total_timesteps      | 37660       |
| train/                  |             |
|    approx_kl            | 0.010009685 |
|    clip_fraction        | 0.11        |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.39       |
|    explained_variance   | 0           |
|    learning

[I 2024-05-01 09:33:47,783] Trial 6 finished with value: 0.500705186277628 and parameters: {'n_steps': 1883, 'batch_size': 32, 'n_epochs': 9, 'gamma': 0.9708389871303754, 'gae_lambda': 0.9323344452931777, 'ent_coef': 0.04114299021061437}. Best is trial 0 with value: 0.6998485624790192.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=746 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | 0.00388  |
| time/              |          |
|    fps             | 5460     |
|    iterations      | 1        |
|    time_elapsed    | 1        |
|    total_timesteps | 7460     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | 0.0091      |
| time/                   |             |
|    fps                  | 3584        |
|    iterations           | 2           |
|    time_elapsed         | 4           |
|    total_timesteps      | 14920       |
| train/                  |             |
|    approx_kl            | 0.011856651 |
|    clip_fraction        | 0.109       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.4        |
|    explained_variance   | 0           |
|    learning

[I 2024-05-01 09:33:56,786] Trial 7 finished with value: 0.24570409953594208 and parameters: {'n_steps': 746, 'batch_size': 64, 'n_epochs': 4, 'gamma': 0.9677079898409799, 'gae_lambda': 0.8138878441836312, 'ent_coef': 0.011173577291197721}. Best is trial 0 with value: 0.6998485624790192.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1126 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | -0.00171 |
| time/              |          |
|    fps             | 7857     |
|    iterations      | 1        |
|    time_elapsed    | 1        |
|    total_timesteps | 11260    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | 0.00105     |
| time/                   |             |
|    fps                  | 1193        |
|    iterations           | 2           |
|    time_elapsed         | 18          |
|    total_timesteps      | 22520       |
| train/                  |             |
|    approx_kl            | 0.011694845 |
|    clip_fraction        | 0.127       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.38       |
|    explained_variance   | 0           |
|    learning

[I 2024-05-01 09:34:30,563] Trial 8 finished with value: -0.4013635516166687 and parameters: {'n_steps': 1126, 'batch_size': 32, 'n_epochs': 10, 'gamma': 0.9967781201291647, 'gae_lambda': 0.8812200623357204, 'ent_coef': 0.025310096114528276}. Best is trial 0 with value: 0.6998485624790192.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1236 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | -0.00577 |
| time/              |          |
|    fps             | 7710     |
|    iterations      | 1        |
|    time_elapsed    | 1        |
|    total_timesteps | 12360    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | 0.00138     |
| time/                   |             |
|    fps                  | 1417        |
|    iterations           | 2           |
|    time_elapsed         | 17          |
|    total_timesteps      | 24720       |
| train/                  |             |
|    approx_kl            | 0.013675092 |
|    clip_fraction        | 0.125       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.38       |
|    explained_variance   | 0           |
|    learning

[I 2024-05-01 09:35:03,609] Trial 9 finished with value: 1.345749020576477 and parameters: {'n_steps': 1236, 'batch_size': 32, 'n_epochs': 8, 'gamma': 0.9588304232757636, 'gae_lambda': 0.8447823142315539, 'ent_coef': 0.02490924021146513}. Best is trial 9 with value: 1.345749020576477.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=863 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | 0.00921  |
| time/              |          |
|    fps             | 6920     |
|    iterations      | 1        |
|    time_elapsed    | 1        |
|    total_timesteps | 8630     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | -0.00211    |
| time/                   |             |
|    fps                  | 2313        |
|    iterations           | 2           |
|    time_elapsed         | 7           |
|    total_timesteps      | 17260       |
| train/                  |             |
|    approx_kl            | 0.014493497 |
|    clip_fraction        | 0.128       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.39       |
|    explained_variance   | 0           |
|    learning

[I 2024-05-01 09:35:21,290] Trial 10 finished with value: -0.5523189455270767 and parameters: {'n_steps': 863, 'batch_size': 64, 'n_epochs': 7, 'gamma': 0.9010283484206303, 'gae_lambda': 0.8724419143505101, 'ent_coef': 0.018725386938053645}. Best is trial 9 with value: 1.345749020576477.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1500 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | -0.00107 |
| time/              |          |
|    fps             | 7618     |
|    iterations      | 1        |
|    time_elapsed    | 1        |
|    total_timesteps | 15000    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | -0.0044     |
| time/                   |             |
|    fps                  | 3576        |
|    iterations           | 2           |
|    time_elapsed         | 8           |
|    total_timesteps      | 30000       |
| train/                  |             |
|    approx_kl            | 0.017403364 |
|    clip_fraction        | 0.12        |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.38       |
|    explained_variance   | 0           |
|    learning

[I 2024-05-01 09:35:33,318] Trial 11 finished with value: 0.09421105682849884 and parameters: {'n_steps': 1500, 'batch_size': 128, 'n_epochs': 7, 'gamma': 0.938384985949731, 'gae_lambda': 0.9382321022764646, 'ent_coef': 0.010274356044818744}. Best is trial 9 with value: 1.345749020576477.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1549 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | 0.00284  |
| time/              |          |
|    fps             | 7745     |
|    iterations      | 1        |
|    time_elapsed    | 1        |
|    total_timesteps | 15490    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | 0.00274     |
| time/                   |             |
|    fps                  | 1426        |
|    iterations           | 2           |
|    time_elapsed         | 21          |
|    total_timesteps      | 30980       |
| train/                  |             |
|    approx_kl            | 0.014829265 |
|    clip_fraction        | 0.117       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.37       |
|    explained_variance   | 0           |
|    learning

[I 2024-05-01 09:36:11,882] Trial 12 finished with value: 0.9562256932258606 and parameters: {'n_steps': 1549, 'batch_size': 32, 'n_epochs': 8, 'gamma': 0.980552998115298, 'gae_lambda': 0.9262505809527626, 'ent_coef': 0.017167183847634895}. Best is trial 9 with value: 1.345749020576477.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1486 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | 0.00543  |
| time/              |          |
|    fps             | 7038     |
|    iterations      | 1        |
|    time_elapsed    | 2        |
|    total_timesteps | 14860    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | -0.00204    |
| time/                   |             |
|    fps                  | 1426        |
|    iterations           | 2           |
|    time_elapsed         | 20          |
|    total_timesteps      | 29720       |
| train/                  |             |
|    approx_kl            | 0.016680405 |
|    clip_fraction        | 0.133       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.37       |
|    explained_variance   | -1.19e-07   |
|    learning

[I 2024-05-01 09:36:48,309] Trial 13 finished with value: -0.002464352175593376 and parameters: {'n_steps': 1486, 'batch_size': 32, 'n_epochs': 8, 'gamma': 0.9218978662948988, 'gae_lambda': 0.9099486609679288, 'ent_coef': 0.018892150042329584}. Best is trial 9 with value: 1.345749020576477.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1214 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | -0.0034  |
| time/              |          |
|    fps             | 7611     |
|    iterations      | 1        |
|    time_elapsed    | 1        |
|    total_timesteps | 12140    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | 0.0019      |
| time/                   |             |
|    fps                  | 1797        |
|    iterations           | 2           |
|    time_elapsed         | 13          |
|    total_timesteps      | 24280       |
| train/                  |             |
|    approx_kl            | 0.013923121 |
|    clip_fraction        | 0.115       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.38       |
|    explained_variance   | 0           |
|    learning

[I 2024-05-01 09:37:12,007] Trial 14 finished with value: -0.3244209233671427 and parameters: {'n_steps': 1214, 'batch_size': 32, 'n_epochs': 6, 'gamma': 0.9581388037755793, 'gae_lambda': 0.9726064844268502, 'ent_coef': 0.017655000461842422}. Best is trial 9 with value: 1.345749020576477.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1653 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | 0.00336  |
| time/              |          |
|    fps             | 7507     |
|    iterations      | 1        |
|    time_elapsed    | 2        |
|    total_timesteps | 16530    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | 0.00123     |
| time/                   |             |
|    fps                  | 1298        |
|    iterations           | 2           |
|    time_elapsed         | 25          |
|    total_timesteps      | 33060       |
| train/                  |             |
|    approx_kl            | 0.013364432 |
|    clip_fraction        | 0.119       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.38       |
|    explained_variance   | 0           |
|    learning

[I 2024-05-01 09:37:58,500] Trial 15 finished with value: -0.25467100739479065 and parameters: {'n_steps': 1653, 'batch_size': 32, 'n_epochs': 9, 'gamma': 0.981253999748795, 'gae_lambda': 0.8590664556531519, 'ent_coef': 0.025441918846357502}. Best is trial 9 with value: 1.345749020576477.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=967 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | 0.00244  |
| time/              |          |
|    fps             | 6227     |
|    iterations      | 1        |
|    time_elapsed    | 1        |
|    total_timesteps | 9670     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | -0.00853    |
| time/                   |             |
|    fps                  | 2184        |
|    iterations           | 2           |
|    time_elapsed         | 8           |
|    total_timesteps      | 19340       |
| train/                  |             |
|    approx_kl            | 0.014457914 |
|    clip_fraction        | 0.131       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.38       |
|    explained_variance   | 0           |
|    learning

[I 2024-05-01 09:38:19,670] Trial 16 finished with value: 0.3099287748336792 and parameters: {'n_steps': 967, 'batch_size': 64, 'n_epochs': 8, 'gamma': 0.952904480398219, 'gae_lambda': 0.9215062280380064, 'ent_coef': 0.014946205922940159}. Best is trial 9 with value: 1.345749020576477.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1679 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | 0.0104   |
| time/              |          |
|    fps             | 7567     |
|    iterations      | 1        |
|    time_elapsed    | 2        |
|    total_timesteps | 16790    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | 0.00786     |
| time/                   |             |
|    fps                  | 1185        |
|    iterations           | 2           |
|    time_elapsed         | 28          |
|    total_timesteps      | 33580       |
| train/                  |             |
|    approx_kl            | 0.012538686 |
|    clip_fraction        | 0.121       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.37       |
|    explained_variance   | 0           |
|    learning

[I 2024-05-01 09:39:10,107] Trial 17 finished with value: -0.20697899162769318 and parameters: {'n_steps': 1679, 'batch_size': 32, 'n_epochs': 10, 'gamma': 0.9677791576007178, 'gae_lambda': 0.8026538659066518, 'ent_coef': 0.02601192738131897}. Best is trial 9 with value: 1.345749020576477.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=573 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | -0.00129 |
| time/              |          |
|    fps             | 7721     |
|    iterations      | 1        |
|    time_elapsed    | 0        |
|    total_timesteps | 5730     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | -0.00242    |
| time/                   |             |
|    fps                  | 1365        |
|    iterations           | 2           |
|    time_elapsed         | 8           |
|    total_timesteps      | 11460       |
| train/                  |             |
|    approx_kl            | 0.008347929 |
|    clip_fraction        | 0.104       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.41       |
|    explained_variance   | 0           |
|    learning

[I 2024-05-01 09:39:39,469] Trial 18 finished with value: -0.4341927580535412 and parameters: {'n_steps': 573, 'batch_size': 32, 'n_epochs': 8, 'gamma': 0.9239572200862564, 'gae_lambda': 0.9557395842541335, 'ent_coef': 0.05033831915908289}. Best is trial 9 with value: 1.345749020576477.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1342 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | 0.00463  |
| time/              |          |
|    fps             | 6534     |
|    iterations      | 1        |
|    time_elapsed    | 2        |
|    total_timesteps | 13420    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | 0.000689    |
| time/                   |             |
|    fps                  | 1958        |
|    iterations           | 2           |
|    time_elapsed         | 13          |
|    total_timesteps      | 26840       |
| train/                  |             |
|    approx_kl            | 0.015482605 |
|    clip_fraction        | 0.117       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.37       |
|    explained_variance   | 0           |
|    learning

[I 2024-05-01 09:40:01,910] Trial 19 finished with value: -0.4059280753135681 and parameters: {'n_steps': 1342, 'batch_size': 32, 'n_epochs': 5, 'gamma': 0.9859565923516519, 'gae_lambda': 0.8857844653039759, 'ent_coef': 0.014054259065451796}. Best is trial 9 with value: 1.345749020576477.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1027 and n_envs=10)


Using cpu device
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 3         |
|    ep_rew_mean     | -0.000914 |
| time/              |           |
|    fps             | 6008      |
|    iterations      | 1         |
|    time_elapsed    | 1         |
|    total_timesteps | 10270     |
----------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | -0.0132     |
| time/                   |             |
|    fps                  | 2139        |
|    iterations           | 2           |
|    time_elapsed         | 9           |
|    total_timesteps      | 20540       |
| train/                  |             |
|    approx_kl            | 0.012342894 |
|    clip_fraction        | 0.123       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.39       |
|    explained_variance   | 0           |
|  

[I 2024-05-01 09:40:18,201] Trial 20 finished with value: 0.03382033482193947 and parameters: {'n_steps': 1027, 'batch_size': 64, 'n_epochs': 8, 'gamma': 0.9733380565547335, 'gae_lambda': 0.8355022118551135, 'ent_coef': 0.031368981820738745}. Best is trial 9 with value: 1.345749020576477.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1710 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | -0.00312 |
| time/              |          |
|    fps             | 6082     |
|    iterations      | 1        |
|    time_elapsed    | 2        |
|    total_timesteps | 17100    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | -0.00108    |
| time/                   |             |
|    fps                  | 2923        |
|    iterations           | 2           |
|    time_elapsed         | 11          |
|    total_timesteps      | 34200       |
| train/                  |             |
|    approx_kl            | 0.012909903 |
|    clip_fraction        | 0.111       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.38       |
|    explained_variance   | -1.19e-07   |
|    learning

[I 2024-05-01 09:40:34,570] Trial 21 finished with value: 2.459814190864563 and parameters: {'n_steps': 1710, 'batch_size': 128, 'n_epochs': 7, 'gamma': 0.9893362519035632, 'gae_lambda': 0.9561271691588732, 'ent_coef': 0.013263736090905665}. Best is trial 21 with value: 2.459814190864563.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1752 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | 0.014    |
| time/              |          |
|    fps             | 7538     |
|    iterations      | 1        |
|    time_elapsed    | 2        |
|    total_timesteps | 17520    |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 3          |
|    ep_rew_mean          | 0.00161    |
| time/                   |            |
|    fps                  | 3815       |
|    iterations           | 2          |
|    time_elapsed         | 9          |
|    total_timesteps      | 35040      |
| train/                  |            |
|    approx_kl            | 0.01093932 |
|    clip_fraction        | 0.11       |
|    clip_range           | 0.2        |
|    entropy_loss         | -1.39      |
|    explained_variance   | 0          |
|    learning_rate        | 

[I 2024-05-01 09:40:47,594] Trial 22 finished with value: 0.44908180832862854 and parameters: {'n_steps': 1752, 'batch_size': 128, 'n_epochs': 6, 'gamma': 0.9920705085130574, 'gae_lambda': 0.9555573902758493, 'ent_coef': 0.013462013841731563}. Best is trial 21 with value: 2.459814190864563.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1349 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | -0.0181  |
| time/              |          |
|    fps             | 7701     |
|    iterations      | 1        |
|    time_elapsed    | 1        |
|    total_timesteps | 13490    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | -0.00431    |
| time/                   |             |
|    fps                  | 3722        |
|    iterations           | 2           |
|    time_elapsed         | 7           |
|    total_timesteps      | 26980       |
| train/                  |             |
|    approx_kl            | 0.011944333 |
|    clip_fraction        | 0.105       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.39       |
|    explained_variance   | 0           |
|    learning

[I 2024-05-01 09:40:58,782] Trial 23 finished with value: -1.1111469119787216 and parameters: {'n_steps': 1349, 'batch_size': 128, 'n_epochs': 7, 'gamma': 0.9890040761665817, 'gae_lambda': 0.9896093219597211, 'ent_coef': 0.017191110528861766}. Best is trial 21 with value: 2.459814190864563.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1619 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | -0.00227 |
| time/              |          |
|    fps             | 5468     |
|    iterations      | 1        |
|    time_elapsed    | 2        |
|    total_timesteps | 16190    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | 0.00167     |
| time/                   |             |
|    fps                  | 1252        |
|    iterations           | 2           |
|    time_elapsed         | 25          |
|    total_timesteps      | 32380       |
| train/                  |             |
|    approx_kl            | 0.013740251 |
|    clip_fraction        | 0.117       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.37       |
|    explained_variance   | 0           |
|    learning

[I 2024-05-01 09:41:44,303] Trial 24 finished with value: -0.2846396937966347 and parameters: {'n_steps': 1619, 'batch_size': 32, 'n_epochs': 9, 'gamma': 0.9755928846257431, 'gae_lambda': 0.9075412072433383, 'ent_coef': 0.021759629549319495}. Best is trial 21 with value: 2.459814190864563.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1787 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | 0.00531  |
| time/              |          |
|    fps             | 6268     |
|    iterations      | 1        |
|    time_elapsed    | 2        |
|    total_timesteps | 17870    |
---------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 3         |
|    ep_rew_mean          | -0.0299   |
| time/                   |           |
|    fps                  | 2999      |
|    iterations           | 2         |
|    time_elapsed         | 11        |
|    total_timesteps      | 35740     |
| train/                  |           |
|    approx_kl            | 0.0120761 |
|    clip_fraction        | 0.118     |
|    clip_range           | 0.2       |
|    entropy_loss         | -1.39     |
|    explained_variance   | 0         |
|    learning_rate        | 0.0003    |
|  

[I 2024-05-01 09:42:02,695] Trial 25 finished with value: -0.5068612024188042 and parameters: {'n_steps': 1787, 'batch_size': 128, 'n_epochs': 8, 'gamma': 0.9621361453237081, 'gae_lambda': 0.9570139853681644, 'ent_coef': 0.021793488863510808}. Best is trial 21 with value: 2.459814190864563.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1541 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | 0.00359  |
| time/              |          |
|    fps             | 7654     |
|    iterations      | 1        |
|    time_elapsed    | 2        |
|    total_timesteps | 15410    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | -0.00817    |
| time/                   |             |
|    fps                  | 1957        |
|    iterations           | 2           |
|    time_elapsed         | 15          |
|    total_timesteps      | 30820       |
| train/                  |             |
|    approx_kl            | 0.013284541 |
|    clip_fraction        | 0.111       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.38       |
|    explained_variance   | 0           |
|    learning

[I 2024-05-01 09:42:29,014] Trial 26 finished with value: -1.427952527999878 and parameters: {'n_steps': 1541, 'batch_size': 32, 'n_epochs': 5, 'gamma': 0.9997627266866185, 'gae_lambda': 0.9224997312508437, 'ent_coef': 0.013265167094139305}. Best is trial 21 with value: 2.459814190864563.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1349 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | -0.0181  |
| time/              |          |
|    fps             | 7741     |
|    iterations      | 1        |
|    time_elapsed    | 1        |
|    total_timesteps | 13490    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | 0.0151      |
| time/                   |             |
|    fps                  | 1690        |
|    iterations           | 2           |
|    time_elapsed         | 15          |
|    total_timesteps      | 26980       |
| train/                  |             |
|    approx_kl            | 0.012303246 |
|    clip_fraction        | 0.119       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.38       |
|    explained_variance   | 0           |
|    learning

[I 2024-05-01 09:42:57,572] Trial 27 finished with value: -0.11777541041374207 and parameters: {'n_steps': 1349, 'batch_size': 32, 'n_epochs': 7, 'gamma': 0.9770931416912693, 'gae_lambda': 0.869676166584531, 'ent_coef': 0.030519332155460967}. Best is trial 21 with value: 2.459814190864563.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=267 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | -0.00204 |
| time/              |          |
|    fps             | 7374     |
|    iterations      | 1        |
|    time_elapsed    | 0        |
|    total_timesteps | 2670     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | -0.000976   |
| time/                   |             |
|    fps                  | 2365        |
|    iterations           | 2           |
|    time_elapsed         | 2           |
|    total_timesteps      | 5340        |
| train/                  |             |
|    approx_kl            | 0.010600836 |
|    clip_fraction        | 0.105       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.4        |
|    explained_variance   | 0           |
|    learning

[I 2024-05-01 09:43:14,712] Trial 28 finished with value: 0.29834095388650894 and parameters: {'n_steps': 267, 'batch_size': 64, 'n_epochs': 9, 'gamma': 0.9473946411383259, 'gae_lambda': 0.9651346509213751, 'ent_coef': 0.01544511164689982}. Best is trial 21 with value: 2.459814190864563.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1345 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | -0.0107  |
| time/              |          |
|    fps             | 6630     |
|    iterations      | 1        |
|    time_elapsed    | 2        |
|    total_timesteps | 13450    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | 0.000912    |
| time/                   |             |
|    fps                  | 3747        |
|    iterations           | 2           |
|    time_elapsed         | 7           |
|    total_timesteps      | 26900       |
| train/                  |             |
|    approx_kl            | 0.011188813 |
|    clip_fraction        | 0.114       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.39       |
|    explained_variance   | 0           |
|    learning

[I 2024-05-01 09:43:25,589] Trial 29 finished with value: 0.13602936267852783 and parameters: {'n_steps': 1345, 'batch_size': 128, 'n_epochs': 7, 'gamma': 0.9897575248294848, 'gae_lambda': 0.9441183871265325, 'ent_coef': 0.011651719485229363}. Best is trial 21 with value: 2.459814190864563.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1994 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | 0.00504  |
| time/              |          |
|    fps             | 5960     |
|    iterations      | 1        |
|    time_elapsed    | 3        |
|    total_timesteps | 19940    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | -0.00821    |
| time/                   |             |
|    fps                  | 3525        |
|    iterations           | 2           |
|    time_elapsed         | 11          |
|    total_timesteps      | 39880       |
| train/                  |             |
|    approx_kl            | 0.013205199 |
|    clip_fraction        | 0.125       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.39       |
|    explained_variance   | 5.96e-08    |
|    learning

[I 2024-05-01 09:43:43,274] Trial 30 finished with value: 1.3859038352966309 and parameters: {'n_steps': 1994, 'batch_size': 128, 'n_epochs': 8, 'gamma': 0.9641852995262986, 'gae_lambda': 0.9169231322590438, 'ent_coef': 0.023014848562279568}. Best is trial 21 with value: 2.459814190864563.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=2009 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | -0.0103  |
| time/              |          |
|    fps             | 7698     |
|    iterations      | 1        |
|    time_elapsed    | 2        |
|    total_timesteps | 20090    |
---------------------------------


[I 2024-05-01 09:43:51,634] Trial 31 finished with value: -0.06822901219129562 and parameters: {'n_steps': 2009, 'batch_size': 128, 'n_epochs': 8, 'gamma': 0.9628959229308003, 'gae_lambda': 0.9268056252253586, 'ent_coef': 0.02169362057008034}. Best is trial 21 with value: 2.459814190864563.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1827 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | -0.00347 |
| time/              |          |
|    fps             | 5015     |
|    iterations      | 1        |
|    time_elapsed    | 3        |
|    total_timesteps | 18270    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | 0.00144     |
| time/                   |             |
|    fps                  | 3242        |
|    iterations           | 2           |
|    time_elapsed         | 11          |
|    total_timesteps      | 36540       |
| train/                  |             |
|    approx_kl            | 0.010992014 |
|    clip_fraction        | 0.119       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.39       |
|    explained_variance   | 0           |
|    learning

[I 2024-05-01 09:44:08,784] Trial 32 finished with value: -0.030878184363245964 and parameters: {'n_steps': 1827, 'batch_size': 128, 'n_epochs': 8, 'gamma': 0.9521439853018286, 'gae_lambda': 0.9034686865296593, 'ent_coef': 0.036762767907163235}. Best is trial 21 with value: 2.459814190864563.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=2027 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | -0.00223 |
| time/              |          |
|    fps             | 7737     |
|    iterations      | 1        |
|    time_elapsed    | 2        |
|    total_timesteps | 20270    |
---------------------------------


[I 2024-05-01 09:44:16,341] Trial 33 finished with value: -0.1164296343922615 and parameters: {'n_steps': 2027, 'batch_size': 128, 'n_epochs': 7, 'gamma': 0.9824129448214425, 'gae_lambda': 0.944420961983665, 'ent_coef': 0.027725279862840575}. Best is trial 21 with value: 2.459814190864563.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1923 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | -0.00227 |
| time/              |          |
|    fps             | 5811     |
|    iterations      | 1        |
|    time_elapsed    | 3        |
|    total_timesteps | 19230    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | -0.00119    |
| time/                   |             |
|    fps                  | 3951        |
|    iterations           | 2           |
|    time_elapsed         | 9           |
|    total_timesteps      | 38460       |
| train/                  |             |
|    approx_kl            | 0.012591024 |
|    clip_fraction        | 0.116       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.39       |
|    explained_variance   | 0           |
|    learning

[I 2024-05-01 09:44:30,641] Trial 34 finished with value: 0.5054288171231747 and parameters: {'n_steps': 1923, 'batch_size': 128, 'n_epochs': 6, 'gamma': 0.9662253168108828, 'gae_lambda': 0.9137469349734351, 'ent_coef': 0.021589406308963676}. Best is trial 21 with value: 2.459814190864563.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1748 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | -0.0189  |
| time/              |          |
|    fps             | 6610     |
|    iterations      | 1        |
|    time_elapsed    | 2        |
|    total_timesteps | 17480    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | -0.00245    |
| time/                   |             |
|    fps                  | 3214        |
|    iterations           | 2           |
|    time_elapsed         | 10          |
|    total_timesteps      | 34960       |
| train/                  |             |
|    approx_kl            | 0.015757682 |
|    clip_fraction        | 0.121       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.37       |
|    explained_variance   | 0           |
|    learning

[I 2024-05-01 09:44:48,728] Trial 35 finished with value: 0.8080161660909653 and parameters: {'n_steps': 1748, 'batch_size': 128, 'n_epochs': 10, 'gamma': 0.956270050984891, 'gae_lambda': 0.8922598390285117, 'ent_coef': 0.012188783961835389}. Best is trial 21 with value: 2.459814190864563.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1566 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | -0.00317 |
| time/              |          |
|    fps             | 7479     |
|    iterations      | 1        |
|    time_elapsed    | 2        |
|    total_timesteps | 15660    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | 0.000141    |
| time/                   |             |
|    fps                  | 1315        |
|    iterations           | 2           |
|    time_elapsed         | 23          |
|    total_timesteps      | 31320       |
| train/                  |             |
|    approx_kl            | 0.018467836 |
|    clip_fraction        | 0.132       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.36       |
|    explained_variance   | 0           |
|    learning

[I 2024-05-01 09:45:31,057] Trial 36 finished with value: 0.5899232402443886 and parameters: {'n_steps': 1566, 'batch_size': 32, 'n_epochs': 9, 'gamma': 0.9416233899643959, 'gae_lambda': 0.8443175736452436, 'ent_coef': 0.016318475631011152}. Best is trial 21 with value: 2.459814190864563.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1403 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | 0.00371  |
| time/              |          |
|    fps             | 6419     |
|    iterations      | 1        |
|    time_elapsed    | 2        |
|    total_timesteps | 14030    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | -0.00614    |
| time/                   |             |
|    fps                  | 4079        |
|    iterations           | 2           |
|    time_elapsed         | 6           |
|    total_timesteps      | 28060       |
| train/                  |             |
|    approx_kl            | 0.011781648 |
|    clip_fraction        | 0.104       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.39       |
|    explained_variance   | 0           |
|    learning

[I 2024-05-01 09:45:40,324] Trial 37 finished with value: 1.1476793140172958 and parameters: {'n_steps': 1403, 'batch_size': 128, 'n_epochs': 5, 'gamma': 0.9756161966195173, 'gae_lambda': 0.9358436819880083, 'ent_coef': 0.01954355567906619}. Best is trial 21 with value: 2.459814190864563.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1428 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | -0.0023  |
| time/              |          |
|    fps             | 7836     |
|    iterations      | 1        |
|    time_elapsed    | 1        |
|    total_timesteps | 14280    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | 0.0188      |
| time/                   |             |
|    fps                  | 3550        |
|    iterations           | 2           |
|    time_elapsed         | 8           |
|    total_timesteps      | 28560       |
| train/                  |             |
|    approx_kl            | 0.011500842 |
|    clip_fraction        | 0.0988      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.4        |
|    explained_variance   | 0           |
|    learning

[I 2024-05-01 09:45:50,969] Trial 38 finished with value: 0.14588411524891853 and parameters: {'n_steps': 1428, 'batch_size': 128, 'n_epochs': 5, 'gamma': 0.9716463815919479, 'gae_lambda': 0.9888966153249017, 'ent_coef': 0.01987669958765447}. Best is trial 21 with value: 2.459814190864563.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1260 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | -0.00332 |
| time/              |          |
|    fps             | 7634     |
|    iterations      | 1        |
|    time_elapsed    | 1        |
|    total_timesteps | 12600    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | -0.00189    |
| time/                   |             |
|    fps                  | 5514        |
|    iterations           | 2           |
|    time_elapsed         | 4           |
|    total_timesteps      | 25200       |
| train/                  |             |
|    approx_kl            | 0.007639188 |
|    clip_fraction        | 0.0821      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.41       |
|    explained_variance   | 5.96e-08    |
|    learning

[I 2024-05-01 09:45:56,899] Trial 39 finished with value: 0.7177366316318512 and parameters: {'n_steps': 1260, 'batch_size': 128, 'n_epochs': 3, 'gamma': 0.9943420352679797, 'gae_lambda': 0.9395605351177051, 'ent_coef': 0.04173661093783458}. Best is trial 21 with value: 2.459814190864563.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1091 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | -0.00244 |
| time/              |          |
|    fps             | 6874     |
|    iterations      | 1        |
|    time_elapsed    | 1        |
|    total_timesteps | 10910    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | -0.0127     |
| time/                   |             |
|    fps                  | 3855        |
|    iterations           | 2           |
|    time_elapsed         | 5           |
|    total_timesteps      | 21820       |
| train/                  |             |
|    approx_kl            | 0.010590443 |
|    clip_fraction        | 0.107       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.4        |
|    explained_variance   | -1.19e-07   |
|    learning

[I 2024-05-01 09:46:04,473] Trial 40 finished with value: -0.11312747001647949 and parameters: {'n_steps': 1091, 'batch_size': 128, 'n_epochs': 5, 'gamma': 0.9859148720823186, 'gae_lambda': 0.8288239832087017, 'ent_coef': 0.024215317739947187}. Best is trial 21 with value: 2.459814190864563.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1885 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | -0.00288 |
| time/              |          |
|    fps             | 7829     |
|    iterations      | 1        |
|    time_elapsed    | 2        |
|    total_timesteps | 18850    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | 0.00128     |
| time/                   |             |
|    fps                  | 3932        |
|    iterations           | 2           |
|    time_elapsed         | 9           |
|    total_timesteps      | 37700       |
| train/                  |             |
|    approx_kl            | 0.012226997 |
|    clip_fraction        | 0.118       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.39       |
|    explained_variance   | 0           |
|    learning

[I 2024-05-01 09:46:18,014] Trial 41 finished with value: -0.07124520093202591 and parameters: {'n_steps': 1885, 'batch_size': 128, 'n_epochs': 6, 'gamma': 0.9787121851859564, 'gae_lambda': 0.9329977436901075, 'ent_coef': 0.016462617793899976}. Best is trial 21 with value: 2.459814190864563.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1439 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | 0.00245  |
| time/              |          |
|    fps             | 7544     |
|    iterations      | 1        |
|    time_elapsed    | 1        |
|    total_timesteps | 14390    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | -0.0055     |
| time/                   |             |
|    fps                  | 3580        |
|    iterations           | 2           |
|    time_elapsed         | 8           |
|    total_timesteps      | 28780       |
| train/                  |             |
|    approx_kl            | 0.011151798 |
|    clip_fraction        | 0.11        |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.39       |
|    explained_variance   | 5.96e-08    |
|    learning

[I 2024-05-01 09:46:29,600] Trial 42 finished with value: -0.22937696427106857 and parameters: {'n_steps': 1439, 'batch_size': 128, 'n_epochs': 7, 'gamma': 0.9637510746451856, 'gae_lambda': 0.9151814350300274, 'ent_coef': 0.02930283211580004}. Best is trial 21 with value: 2.459814190864563.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1676 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | -0.00337 |
| time/              |          |
|    fps             | 7791     |
|    iterations      | 1        |
|    time_elapsed    | 2        |
|    total_timesteps | 16760    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | -0.0271     |
| time/                   |             |
|    fps                  | 2478        |
|    iterations           | 2           |
|    time_elapsed         | 13          |
|    total_timesteps      | 33520       |
| train/                  |             |
|    approx_kl            | 0.012803021 |
|    clip_fraction        | 0.104       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.38       |
|    explained_variance   | -1.19e-07   |
|    learning

[I 2024-05-01 09:46:53,668] Trial 43 finished with value: 0.7697563171386719 and parameters: {'n_steps': 1676, 'batch_size': 32, 'n_epochs': 4, 'gamma': 0.9724323152534293, 'gae_lambda': 0.9481177724089025, 'ent_coef': 0.019705401687148367}. Best is trial 21 with value: 2.459814190864563.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1224 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | 0.0117   |
| time/              |          |
|    fps             | 7409     |
|    iterations      | 1        |
|    time_elapsed    | 1        |
|    total_timesteps | 12240    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | -0.00149    |
| time/                   |             |
|    fps                  | 3480        |
|    iterations           | 2           |
|    time_elapsed         | 7           |
|    total_timesteps      | 24480       |
| train/                  |             |
|    approx_kl            | 0.010611271 |
|    clip_fraction        | 0.11        |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.4        |
|    explained_variance   | 0           |
|    learning

[I 2024-05-01 09:47:04,947] Trial 44 finished with value: -0.011886470019817352 and parameters: {'n_steps': 1224, 'batch_size': 128, 'n_epochs': 8, 'gamma': 0.9857131481646155, 'gae_lambda': 0.8970485251667095, 'ent_coef': 0.034458868702424164}. Best is trial 21 with value: 2.459814190864563.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1932 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | 0.0104   |
| time/              |          |
|    fps             | 7093     |
|    iterations      | 1        |
|    time_elapsed    | 2        |
|    total_timesteps | 19320    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | 0.00083     |
| time/                   |             |
|    fps                  | 1825        |
|    iterations           | 2           |
|    time_elapsed         | 21          |
|    total_timesteps      | 38640       |
| train/                  |             |
|    approx_kl            | 0.012543147 |
|    clip_fraction        | 0.112       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.38       |
|    explained_variance   | 0           |
|    learning

[I 2024-05-01 09:47:42,490] Trial 45 finished with value: 0.23440569639205933 and parameters: {'n_steps': 1932, 'batch_size': 32, 'n_epochs': 6, 'gamma': 0.9783058058525816, 'gae_lambda': 0.9328192485207081, 'ent_coef': 0.023837637982969298}. Best is trial 21 with value: 2.459814190864563.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1547 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | 0.00663  |
| time/              |          |
|    fps             | 6520     |
|    iterations      | 1        |
|    time_elapsed    | 2        |
|    total_timesteps | 15470    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | -0.0075     |
| time/                   |             |
|    fps                  | 2365        |
|    iterations           | 2           |
|    time_elapsed         | 13          |
|    total_timesteps      | 30940       |
| train/                  |             |
|    approx_kl            | 0.006709581 |
|    clip_fraction        | 0.0952      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.42       |
|    explained_variance   | 0           |
|    learning

[I 2024-05-01 09:48:03,988] Trial 46 finished with value: 0.5567908883094788 and parameters: {'n_steps': 1547, 'batch_size': 64, 'n_epochs': 8, 'gamma': 0.9571357968646771, 'gae_lambda': 0.9732082751630189, 'ent_coef': 0.07931812119486943}. Best is trial 21 with value: 2.459814190864563.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=871 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | -0.00896 |
| time/              |          |
|    fps             | 7747     |
|    iterations      | 1        |
|    time_elapsed    | 1        |
|    total_timesteps | 8710     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | 0.00133     |
| time/                   |             |
|    fps                  | 2901        |
|    iterations           | 2           |
|    time_elapsed         | 6           |
|    total_timesteps      | 17420       |
| train/                  |             |
|    approx_kl            | 0.012289234 |
|    clip_fraction        | 0.104       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.39       |
|    explained_variance   | 0           |
|    learning

[I 2024-05-01 09:48:16,803] Trial 47 finished with value: 0.3618173152208328 and parameters: {'n_steps': 871, 'batch_size': 128, 'n_epochs': 9, 'gamma': 0.9689031765491583, 'gae_lambda': 0.9209375364925805, 'ent_coef': 0.012424969949829943}. Best is trial 21 with value: 2.459814190864563.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1832 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | -0.00599 |
| time/              |          |
|    fps             | 6619     |
|    iterations      | 1        |
|    time_elapsed    | 2        |
|    total_timesteps | 18320    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | 0.00299     |
| time/                   |             |
|    fps                  | 2827        |
|    iterations           | 2           |
|    time_elapsed         | 12          |
|    total_timesteps      | 36640       |
| train/                  |             |
|    approx_kl            | 0.015130126 |
|    clip_fraction        | 0.12        |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.37       |
|    explained_variance   | 0           |
|    learning

[I 2024-05-01 09:48:37,729] Trial 48 finished with value: -0.12514418363571167 and parameters: {'n_steps': 1832, 'batch_size': 32, 'n_epochs': 3, 'gamma': 0.9832446364405474, 'gae_lambda': 0.815876856475357, 'ent_coef': 0.010478689837720241}. Best is trial 21 with value: 2.459814190864563.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1156 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | 0.00619  |
| time/              |          |
|    fps             | 7604     |
|    iterations      | 1        |
|    time_elapsed    | 1        |
|    total_timesteps | 11560    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | 0.0197      |
| time/                   |             |
|    fps                  | 1410        |
|    iterations           | 2           |
|    time_elapsed         | 16          |
|    total_timesteps      | 23120       |
| train/                  |             |
|    approx_kl            | 0.013951143 |
|    clip_fraction        | 0.127       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.38       |
|    explained_variance   | -1.19e-07   |
|    learning

[I 2024-05-01 09:49:06,004] Trial 49 finished with value: -0.09421353042125702 and parameters: {'n_steps': 1156, 'batch_size': 32, 'n_epochs': 7, 'gamma': 0.9942217519237082, 'gae_lambda': 0.8582907655866112, 'ent_coef': 0.01516924458015774}. Best is trial 21 with value: 2.459814190864563.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1416 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | 0.00117  |
| time/              |          |
|    fps             | 7695     |
|    iterations      | 1        |
|    time_elapsed    | 1        |
|    total_timesteps | 14160    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | 0.00131     |
| time/                   |             |
|    fps                  | 3196        |
|    iterations           | 2           |
|    time_elapsed         | 8           |
|    total_timesteps      | 28320       |
| train/                  |             |
|    approx_kl            | 0.014535238 |
|    clip_fraction        | 0.115       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.39       |
|    explained_variance   | -1.19e-07   |
|    learning

[I 2024-05-01 09:49:20,176] Trial 50 finished with value: 0.900845929980278 and parameters: {'n_steps': 1416, 'batch_size': 128, 'n_epochs': 8, 'gamma': 0.9337837881422829, 'gae_lambda': 0.9639859626617755, 'ent_coef': 0.018316514348196675}. Best is trial 21 with value: 2.459814190864563.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1466 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | 0.00112  |
| time/              |          |
|    fps             | 6820     |
|    iterations      | 1        |
|    time_elapsed    | 2        |
|    total_timesteps | 14660    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | -0.00955    |
| time/                   |             |
|    fps                  | 3292        |
|    iterations           | 2           |
|    time_elapsed         | 8           |
|    total_timesteps      | 29320       |
| train/                  |             |
|    approx_kl            | 0.013900415 |
|    clip_fraction        | 0.118       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.38       |
|    explained_variance   | 0           |
|    learning

[I 2024-05-01 09:49:33,317] Trial 51 finished with value: 1.2214641906321049 and parameters: {'n_steps': 1466, 'batch_size': 128, 'n_epochs': 8, 'gamma': 0.9305928193858809, 'gae_lambda': 0.9615238417790627, 'ent_coef': 0.018432120228367925}. Best is trial 21 with value: 2.459814190864563.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1614 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | 0.012    |
| time/              |          |
|    fps             | 6095     |
|    iterations      | 1        |
|    time_elapsed    | 2        |
|    total_timesteps | 16140    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | 0.00552     |
| time/                   |             |
|    fps                  | 2915        |
|    iterations           | 2           |
|    time_elapsed         | 11          |
|    total_timesteps      | 32280       |
| train/                  |             |
|    approx_kl            | 0.015546896 |
|    clip_fraction        | 0.121       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.38       |
|    explained_variance   | 0           |
|    learning

[I 2024-05-01 09:49:51,427] Trial 52 finished with value: -0.5482422485947609 and parameters: {'n_steps': 1614, 'batch_size': 128, 'n_epochs': 9, 'gamma': 0.9023700471718197, 'gae_lambda': 0.978630234087683, 'ent_coef': 0.020689977739582203}. Best is trial 21 with value: 2.459814190864563.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1468 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | 0.00359  |
| time/              |          |
|    fps             | 7194     |
|    iterations      | 1        |
|    time_elapsed    | 2        |
|    total_timesteps | 14680    |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 3          |
|    ep_rew_mean          | -0.000326  |
| time/                   |            |
|    fps                  | 3642       |
|    iterations           | 2          |
|    time_elapsed         | 8          |
|    total_timesteps      | 29360      |
| train/                  |            |
|    approx_kl            | 0.01364221 |
|    clip_fraction        | 0.12       |
|    clip_range           | 0.2        |
|    entropy_loss         | -1.39      |
|    explained_variance   | 0          |
|    learning_rate        | 

[I 2024-05-01 09:50:05,476] Trial 53 finished with value: 2.2459967136383057 and parameters: {'n_steps': 1468, 'batch_size': 128, 'n_epochs': 8, 'gamma': 0.9174872428830874, 'gae_lambda': 0.960234092762371, 'ent_coef': 0.023744653696334432}. Best is trial 21 with value: 2.459814190864563.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1717 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | 0.0207   |
| time/              |          |
|    fps             | 6804     |
|    iterations      | 1        |
|    time_elapsed    | 2        |
|    total_timesteps | 17170    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | -0.000427   |
| time/                   |             |
|    fps                  | 3201        |
|    iterations           | 2           |
|    time_elapsed         | 10          |
|    total_timesteps      | 34340       |
| train/                  |             |
|    approx_kl            | 0.012887074 |
|    clip_fraction        | 0.123       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.39       |
|    explained_variance   | 0           |
|    learning

[I 2024-05-01 09:50:20,581] Trial 54 finished with value: -0.16440780460834503 and parameters: {'n_steps': 1717, 'batch_size': 128, 'n_epochs': 7, 'gamma': 0.9152132893166267, 'gae_lambda': 0.9517870883051465, 'ent_coef': 0.027214733604795777}. Best is trial 21 with value: 2.459814190864563.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1267 and n_envs=10)


Using cpu device
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 3         |
|    ep_rew_mean     | -0.000272 |
| time/              |           |
|    fps             | 7806      |
|    iterations      | 1         |
|    time_elapsed    | 1         |
|    total_timesteps | 12670     |
----------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | 0.00229     |
| time/                   |             |
|    fps                  | 3146        |
|    iterations           | 2           |
|    time_elapsed         | 8           |
|    total_timesteps      | 25340       |
| train/                  |             |
|    approx_kl            | 0.011933047 |
|    clip_fraction        | 0.114       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.39       |
|    explained_variance   | 0           |
|  

[I 2024-05-01 09:50:32,429] Trial 55 finished with value: -0.11453749984502792 and parameters: {'n_steps': 1267, 'batch_size': 128, 'n_epochs': 8, 'gamma': 0.9243490360191765, 'gae_lambda': 0.9691938363457343, 'ent_coef': 0.023812209545554425}. Best is trial 21 with value: 2.459814190864563.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1455 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | 0.000931 |
| time/              |          |
|    fps             | 7308     |
|    iterations      | 1        |
|    time_elapsed    | 1        |
|    total_timesteps | 14550    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | 0.000434    |
| time/                   |             |
|    fps                  | 3055        |
|    iterations           | 2           |
|    time_elapsed         | 9           |
|    total_timesteps      | 29100       |
| train/                  |             |
|    approx_kl            | 0.012047443 |
|    clip_fraction        | 0.116       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.39       |
|    explained_variance   | 0           |
|    learning

[I 2024-05-01 09:50:46,433] Trial 56 finished with value: -0.17513318732380867 and parameters: {'n_steps': 1455, 'batch_size': 128, 'n_epochs': 9, 'gamma': 0.9135134440643395, 'gae_lambda': 0.9640456365986338, 'ent_coef': 0.03383325821122077}. Best is trial 21 with value: 2.459814190864563.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1166 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | 0.00217  |
| time/              |          |
|    fps             | 7735     |
|    iterations      | 1        |
|    time_elapsed    | 1        |
|    total_timesteps | 11660    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | -0.0155     |
| time/                   |             |
|    fps                  | 3318        |
|    iterations           | 2           |
|    time_elapsed         | 7           |
|    total_timesteps      | 23320       |
| train/                  |             |
|    approx_kl            | 0.011650526 |
|    clip_fraction        | 0.117       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.39       |
|    explained_variance   | 0           |
|    learning

[I 2024-05-01 09:50:56,947] Trial 57 finished with value: 0.7372411042451859 and parameters: {'n_steps': 1166, 'batch_size': 128, 'n_epochs': 8, 'gamma': 0.9297284216103708, 'gae_lambda': 0.980492994610174, 'ent_coef': 0.02340167536680025}. Best is trial 21 with value: 2.459814190864563.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1286 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | 0.006    |
| time/              |          |
|    fps             | 7531     |
|    iterations      | 1        |
|    time_elapsed    | 1        |
|    total_timesteps | 12860    |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 3            |
|    ep_rew_mean          | 0.00528      |
| time/                   |              |
|    fps                  | 3693         |
|    iterations           | 2            |
|    time_elapsed         | 6            |
|    total_timesteps      | 25720        |
| train/                  |              |
|    approx_kl            | 0.0144473035 |
|    clip_fraction        | 0.124        |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.39        |
|    explained_variance   | 0            

[I 2024-05-01 09:51:07,557] Trial 58 finished with value: -0.5741300135850906 and parameters: {'n_steps': 1286, 'batch_size': 128, 'n_epochs': 7, 'gamma': 0.9092227821454826, 'gae_lambda': 0.9355665823667687, 'ent_coef': 0.01877113730694175}. Best is trial 21 with value: 2.459814190864563.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1393 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | 0.00511  |
| time/              |          |
|    fps             | 7422     |
|    iterations      | 1        |
|    time_elapsed    | 1        |
|    total_timesteps | 13930    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | -0.00518    |
| time/                   |             |
|    fps                  | 3793        |
|    iterations           | 2           |
|    time_elapsed         | 7           |
|    total_timesteps      | 27860       |
| train/                  |             |
|    approx_kl            | 0.014182105 |
|    clip_fraction        | 0.114       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.39       |
|    explained_variance   | 0           |
|    learning

[I 2024-05-01 09:51:19,543] Trial 59 finished with value: 0.21368944644927979 and parameters: {'n_steps': 1393, 'batch_size': 64, 'n_epochs': 4, 'gamma': 0.9168431828945328, 'gae_lambda': 0.957114356199098, 'ent_coef': 0.014153831269238302}. Best is trial 21 with value: 2.459814190864563.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1025 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | 0.000724 |
| time/              |          |
|    fps             | 7732     |
|    iterations      | 1        |
|    time_elapsed    | 1        |
|    total_timesteps | 10250    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | 0.00326     |
| time/                   |             |
|    fps                  | 3610        |
|    iterations           | 2           |
|    time_elapsed         | 5           |
|    total_timesteps      | 20500       |
| train/                  |             |
|    approx_kl            | 0.011129837 |
|    clip_fraction        | 0.12        |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.4        |
|    explained_variance   | -1.19e-07   |
|    learning

[I 2024-05-01 09:51:28,293] Trial 60 finished with value: -0.28652985394001007 and parameters: {'n_steps': 1025, 'batch_size': 128, 'n_epochs': 8, 'gamma': 0.9441003748206619, 'gae_lambda': 0.9496800228926137, 'ent_coef': 0.0265956985285729}. Best is trial 21 with value: 2.459814190864563.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1493 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | 0.000388 |
| time/              |          |
|    fps             | 5640     |
|    iterations      | 1        |
|    time_elapsed    | 2        |
|    total_timesteps | 14930    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | 0.00633     |
| time/                   |             |
|    fps                  | 1448        |
|    iterations           | 2           |
|    time_elapsed         | 20          |
|    total_timesteps      | 29860       |
| train/                  |             |
|    approx_kl            | 0.015113534 |
|    clip_fraction        | 0.124       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.37       |
|    explained_variance   | 0           |
|    learning

[I 2024-05-01 09:52:04,842] Trial 61 finished with value: 0.639634981751442 and parameters: {'n_steps': 1493, 'batch_size': 32, 'n_epochs': 8, 'gamma': 0.9606491888347436, 'gae_lambda': 0.9405373735390311, 'ent_coef': 0.01714045037657255}. Best is trial 21 with value: 2.459814190864563.


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | 0.012    |
| time/              |          |
|    fps             | 6289     |
|    iterations      | 1        |
|    time_elapsed    | 2        |
|    total_timesteps | 16140    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | 0.00689     |
| time/                   |             |
|    fps                  | 3583        |
|    iterations           | 2           |
|    time_elapsed         | 9           |
|    total_timesteps      | 32280       |
| train/                  |             |
|    approx_kl            | 0.014627366 |
|    clip_fraction        | 0.115       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.38       |
|    explained_variance   | -1.19e-07   |
|    learning

[I 2024-05-01 09:52:17,851] Trial 62 finished with value: -0.7963514141738415 and parameters: {'n_steps': 1614, 'batch_size': 128, 'n_epochs': 7, 'gamma': 0.9541621843257817, 'gae_lambda': 0.9262926784246509, 'ent_coef': 0.017810213960257188}. Best is trial 21 with value: 2.459814190864563.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1522 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | 0.00217  |
| time/              |          |
|    fps             | 5848     |
|    iterations      | 1        |
|    time_elapsed    | 2        |
|    total_timesteps | 15220    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | 0.013       |
| time/                   |             |
|    fps                  | 1277        |
|    iterations           | 2           |
|    time_elapsed         | 23          |
|    total_timesteps      | 30440       |
| train/                  |             |
|    approx_kl            | 0.013097768 |
|    clip_fraction        | 0.121       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.37       |
|    explained_variance   | 0           |
|    learning

[I 2024-05-01 09:53:02,254] Trial 63 finished with value: 0.7540276050567627 and parameters: {'n_steps': 1522, 'batch_size': 32, 'n_epochs': 9, 'gamma': 0.9754133559004202, 'gae_lambda': 0.9045520467501241, 'ent_coef': 0.020149263574072675}. Best is trial 21 with value: 2.459814190864563.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1316 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | 0.00614  |
| time/              |          |
|    fps             | 7436     |
|    iterations      | 1        |
|    time_elapsed    | 1        |
|    total_timesteps | 13160    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | -0.00141    |
| time/                   |             |
|    fps                  | 3496        |
|    iterations           | 2           |
|    time_elapsed         | 7           |
|    total_timesteps      | 26320       |
| train/                  |             |
|    approx_kl            | 0.014065445 |
|    clip_fraction        | 0.124       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.38       |
|    explained_variance   | 0           |
|    learning

[I 2024-05-01 09:53:14,078] Trial 64 finished with value: 0.34336601197719574 and parameters: {'n_steps': 1316, 'batch_size': 128, 'n_epochs': 8, 'gamma': 0.9357385434223648, 'gae_lambda': 0.8777611885844347, 'ent_coef': 0.01614312128922391}. Best is trial 21 with value: 2.459814190864563.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1591 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | 0.00234  |
| time/              |          |
|    fps             | 7474     |
|    iterations      | 1        |
|    time_elapsed    | 2        |
|    total_timesteps | 15910    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | 0.000152    |
| time/                   |             |
|    fps                  | 2361        |
|    iterations           | 2           |
|    time_elapsed         | 13          |
|    total_timesteps      | 31820       |
| train/                  |             |
|    approx_kl            | 0.014223712 |
|    clip_fraction        | 0.121       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.38       |
|    explained_variance   | 0           |
|    learning

[I 2024-05-01 09:53:37,131] Trial 65 finished with value: 0.04495678097009659 and parameters: {'n_steps': 1591, 'batch_size': 64, 'n_epochs': 8, 'gamma': 0.9480381878058446, 'gae_lambda': 0.9593395544134122, 'ent_coef': 0.02286874895778485}. Best is trial 21 with value: 2.459814190864563.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1706 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | 0.0169   |
| time/              |          |
|    fps             | 7762     |
|    iterations      | 1        |
|    time_elapsed    | 2        |
|    total_timesteps | 17060    |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 3            |
|    ep_rew_mean          | 0.00504      |
| time/                   |              |
|    fps                  | 1659         |
|    iterations           | 2            |
|    time_elapsed         | 20           |
|    total_timesteps      | 34120        |
| train/                  |              |
|    approx_kl            | 0.0125982575 |
|    clip_fraction        | 0.119        |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.38        |
|    explained_variance   | 0            

[I 2024-05-01 09:54:14,890] Trial 66 finished with value: 2.301076889038086 and parameters: {'n_steps': 1706, 'batch_size': 32, 'n_epochs': 7, 'gamma': 0.9283593598139808, 'gae_lambda': 0.9164408298118542, 'ent_coef': 0.02906582246192688}. Best is trial 21 with value: 2.459814190864563.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1812 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | 0.00205  |
| time/              |          |
|    fps             | 7304     |
|    iterations      | 1        |
|    time_elapsed    | 2        |
|    total_timesteps | 18120    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | 0.00996     |
| time/                   |             |
|    fps                  | 4285        |
|    iterations           | 2           |
|    time_elapsed         | 8           |
|    total_timesteps      | 36240       |
| train/                  |             |
|    approx_kl            | 0.012618386 |
|    clip_fraction        | 0.122       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.39       |
|    explained_variance   | 0           |
|    learning

[I 2024-05-01 09:54:28,224] Trial 67 finished with value: -1.2633735686540604 and parameters: {'n_steps': 1812, 'batch_size': 128, 'n_epochs': 6, 'gamma': 0.9281220986569748, 'gae_lambda': 0.8895806911888333, 'ent_coef': 0.028810200963557603}. Best is trial 21 with value: 2.459814190864563.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1692 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | 0.00108  |
| time/              |          |
|    fps             | 7686     |
|    iterations      | 1        |
|    time_elapsed    | 2        |
|    total_timesteps | 16920    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | 0.022       |
| time/                   |             |
|    fps                  | 1649        |
|    iterations           | 2           |
|    time_elapsed         | 20          |
|    total_timesteps      | 33840       |
| train/                  |             |
|    approx_kl            | 0.012196806 |
|    clip_fraction        | 0.124       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.39       |
|    explained_variance   | -1.19e-07   |
|    learning

[I 2024-05-01 09:55:06,081] Trial 68 finished with value: -2.765710487961769 and parameters: {'n_steps': 1692, 'batch_size': 32, 'n_epochs': 7, 'gamma': 0.919773961546277, 'gae_lambda': 0.9165654722269534, 'ent_coef': 0.039660214600199986}. Best is trial 21 with value: 2.459814190864563.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1944 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | -0.00887 |
| time/              |          |
|    fps             | 7732     |
|    iterations      | 1        |
|    time_elapsed    | 2        |
|    total_timesteps | 19440    |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 3          |
|    ep_rew_mean          | 0.0101     |
| time/                   |            |
|    fps                  | 3738       |
|    iterations           | 2          |
|    time_elapsed         | 10         |
|    total_timesteps      | 38880      |
| train/                  |            |
|    approx_kl            | 0.01195423 |
|    clip_fraction        | 0.124      |
|    clip_range           | 0.2        |
|    entropy_loss         | -1.39      |
|    explained_variance   | 0          |
|    learning_rate        | 

[I 2024-05-01 09:55:21,989] Trial 69 finished with value: -0.8541489541530609 and parameters: {'n_steps': 1944, 'batch_size': 128, 'n_epochs': 7, 'gamma': 0.9291536953144376, 'gae_lambda': 0.9012457877214145, 'ent_coef': 0.03199044914464916}. Best is trial 21 with value: 2.459814190864563.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1373 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | -0.00645 |
| time/              |          |
|    fps             | 7550     |
|    iterations      | 1        |
|    time_elapsed    | 1        |
|    total_timesteps | 13730    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | -0.00624    |
| time/                   |             |
|    fps                  | 1714        |
|    iterations           | 2           |
|    time_elapsed         | 16          |
|    total_timesteps      | 27460       |
| train/                  |             |
|    approx_kl            | 0.013321014 |
|    clip_fraction        | 0.115       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.38       |
|    explained_variance   | 0           |
|    learning

[I 2024-05-01 09:55:49,442] Trial 70 finished with value: -0.6096344143152237 and parameters: {'n_steps': 1373, 'batch_size': 32, 'n_epochs': 6, 'gamma': 0.9653649039596252, 'gae_lambda': 0.9452389957292673, 'ent_coef': 0.025952063113798038}. Best is trial 21 with value: 2.459814190864563.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1476 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | 0.00477  |
| time/              |          |
|    fps             | 7757     |
|    iterations      | 1        |
|    time_elapsed    | 1        |
|    total_timesteps | 14760    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | -0.00458    |
| time/                   |             |
|    fps                  | 1451        |
|    iterations           | 2           |
|    time_elapsed         | 20          |
|    total_timesteps      | 29520       |
| train/                  |             |
|    approx_kl            | 0.014227344 |
|    clip_fraction        | 0.124       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.37       |
|    explained_variance   | 0           |
|    learning

[I 2024-05-01 09:56:27,076] Trial 71 finished with value: -0.09171869605779648 and parameters: {'n_steps': 1476, 'batch_size': 32, 'n_epochs': 8, 'gamma': 0.9397199325151662, 'gae_lambda': 0.9272342298621223, 'ent_coef': 0.022225054846909714}. Best is trial 21 with value: 2.459814190864563.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1748 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | -0.0189  |
| time/              |          |
|    fps             | 7656     |
|    iterations      | 1        |
|    time_elapsed    | 2        |
|    total_timesteps | 17480    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | 0.00454     |
| time/                   |             |
|    fps                  | 1570        |
|    iterations           | 2           |
|    time_elapsed         | 22          |
|    total_timesteps      | 34960       |
| train/                  |             |
|    approx_kl            | 0.014224905 |
|    clip_fraction        | 0.126       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.38       |
|    explained_variance   | 0           |
|    learning

[I 2024-05-01 09:57:06,162] Trial 72 finished with value: -0.8061429262161255 and parameters: {'n_steps': 1748, 'batch_size': 32, 'n_epochs': 7, 'gamma': 0.9089092730184438, 'gae_lambda': 0.9082101948380974, 'ent_coef': 0.025269717786782307}. Best is trial 21 with value: 2.459814190864563.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1630 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | 0.00432  |
| time/              |          |
|    fps             | 7619     |
|    iterations      | 1        |
|    time_elapsed    | 2        |
|    total_timesteps | 16300    |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 3          |
|    ep_rew_mean          | -0.00475   |
| time/                   |            |
|    fps                  | 1322       |
|    iterations           | 2          |
|    time_elapsed         | 24         |
|    total_timesteps      | 32600      |
| train/                  |            |
|    approx_kl            | 0.01268742 |
|    clip_fraction        | 0.119      |
|    clip_range           | 0.2        |
|    entropy_loss         | -1.37      |
|    explained_variance   | 0          |
|    learning_rate        | 

[I 2024-05-01 09:57:51,551] Trial 73 finished with value: 0.5507103204727173 and parameters: {'n_steps': 1630, 'batch_size': 32, 'n_epochs': 9, 'gamma': 0.969269113583093, 'gae_lambda': 0.9304150786568127, 'ent_coef': 0.02069077733143889}. Best is trial 21 with value: 2.459814190864563.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1859 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | 0.00317  |
| time/              |          |
|    fps             | 7730     |
|    iterations      | 1        |
|    time_elapsed    | 2        |
|    total_timesteps | 18590    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | -0.000743   |
| time/                   |             |
|    fps                  | 1414        |
|    iterations           | 2           |
|    time_elapsed         | 26          |
|    total_timesteps      | 37180       |
| train/                  |             |
|    approx_kl            | 0.017448192 |
|    clip_fraction        | 0.142       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.35       |
|    explained_variance   | 0           |
|    learning

[I 2024-05-01 09:58:39,850] Trial 74 finished with value: -0.0414355993270874 and parameters: {'n_steps': 1859, 'batch_size': 32, 'n_epochs': 8, 'gamma': 0.9212312532248347, 'gae_lambda': 0.8032807404715433, 'ent_coef': 0.014350097083596322}. Best is trial 21 with value: 2.459814190864563.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1554 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | 0.00718  |
| time/              |          |
|    fps             | 7435     |
|    iterations      | 1        |
|    time_elapsed    | 2        |
|    total_timesteps | 15540    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | -0.00334    |
| time/                   |             |
|    fps                  | 3967        |
|    iterations           | 2           |
|    time_elapsed         | 7           |
|    total_timesteps      | 31080       |
| train/                  |             |
|    approx_kl            | 0.013893219 |
|    clip_fraction        | 0.119       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.38       |
|    explained_variance   | 0           |
|    learning

[I 2024-05-01 09:58:52,416] Trial 75 finished with value: 1.506170996464789 and parameters: {'n_steps': 1554, 'batch_size': 128, 'n_epochs': 7, 'gamma': 0.9808368124929076, 'gae_lambda': 0.9196293103536219, 'ent_coef': 0.013054118623200706}. Best is trial 21 with value: 2.459814190864563.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1512 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | -0.00239 |
| time/              |          |
|    fps             | 7844     |
|    iterations      | 1        |
|    time_elapsed    | 1        |
|    total_timesteps | 15120    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | 0.0114      |
| time/                   |             |
|    fps                  | 3923        |
|    iterations           | 2           |
|    time_elapsed         | 7           |
|    total_timesteps      | 30240       |
| train/                  |             |
|    approx_kl            | 0.014293436 |
|    clip_fraction        | 0.111       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.39       |
|    explained_variance   | 0           |
|    learning

[I 2024-05-01 09:59:04,685] Trial 76 finished with value: -0.3686307668685913 and parameters: {'n_steps': 1512, 'batch_size': 128, 'n_epochs': 7, 'gamma': 0.988845292926095, 'gae_lambda': 0.9195875939787977, 'ent_coef': 0.012885023026499522}. Best is trial 21 with value: 2.459814190864563.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1197 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | 0.00146  |
| time/              |          |
|    fps             | 7495     |
|    iterations      | 1        |
|    time_elapsed    | 1        |
|    total_timesteps | 11970    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | -0.00934    |
| time/                   |             |
|    fps                  | 4292        |
|    iterations           | 2           |
|    time_elapsed         | 5           |
|    total_timesteps      | 23940       |
| train/                  |             |
|    approx_kl            | 0.011592531 |
|    clip_fraction        | 0.116       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.39       |
|    explained_variance   | 0           |
|    learning

[I 2024-05-01 09:59:13,131] Trial 77 finished with value: 0.4836811423301697 and parameters: {'n_steps': 1197, 'batch_size': 128, 'n_epochs': 6, 'gamma': 0.9265377479389246, 'gae_lambda': 0.9525683912895958, 'ent_coef': 0.011678230365826155}. Best is trial 21 with value: 2.459814190864563.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1976 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | 0.000178 |
| time/              |          |
|    fps             | 6767     |
|    iterations      | 1        |
|    time_elapsed    | 2        |
|    total_timesteps | 19760    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | -0.0131     |
| time/                   |             |
|    fps                  | 4204        |
|    iterations           | 2           |
|    time_elapsed         | 9           |
|    total_timesteps      | 39520       |
| train/                  |             |
|    approx_kl            | 0.013635315 |
|    clip_fraction        | 0.124       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.39       |
|    explained_variance   | 0           |
|    learning

[I 2024-05-01 09:59:26,183] Trial 78 finished with value: 1.8430870175361633 and parameters: {'n_steps': 1976, 'batch_size': 128, 'n_epochs': 5, 'gamma': 0.9599528352429846, 'gae_lambda': 0.9109187676140882, 'ent_coef': 0.010921461339372511}. Best is trial 21 with value: 2.459814190864563.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1799 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | -0.0014  |
| time/              |          |
|    fps             | 6292     |
|    iterations      | 1        |
|    time_elapsed    | 2        |
|    total_timesteps | 17990    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | -0.000842   |
| time/                   |             |
|    fps                  | 3565        |
|    iterations           | 2           |
|    time_elapsed         | 10          |
|    total_timesteps      | 35980       |
| train/                  |             |
|    approx_kl            | 0.015430038 |
|    clip_fraction        | 0.127       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.38       |
|    explained_variance   | 0           |
|    learning

[I 2024-05-01 09:59:41,371] Trial 79 finished with value: 0.2707633748650551 and parameters: {'n_steps': 1799, 'batch_size': 128, 'n_epochs': 7, 'gamma': 0.9495452501314855, 'gae_lambda': 0.910730709004832, 'ent_coef': 0.011134982204253144}. Best is trial 21 with value: 2.459814190864563.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1998 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | 0.000147 |
| time/              |          |
|    fps             | 6472     |
|    iterations      | 1        |
|    time_elapsed    | 3        |
|    total_timesteps | 19980    |
---------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 3         |
|    ep_rew_mean          | 0.00261   |
| time/                   |           |
|    fps                  | 4030      |
|    iterations           | 2         |
|    time_elapsed         | 9         |
|    total_timesteps      | 39960     |
| train/                  |           |
|    approx_kl            | 0.0157086 |
|    clip_fraction        | 0.132     |
|    clip_range           | 0.2       |
|    entropy_loss         | -1.38     |
|    explained_variance   | 0         |
|    learning_rate        | 0.0003    |
|  

[I 2024-05-01 09:59:56,651] Trial 80 finished with value: 0.8462444245815277 and parameters: {'n_steps': 1998, 'batch_size': 128, 'n_epochs': 6, 'gamma': 0.9449590529330069, 'gae_lambda': 0.8677102573687979, 'ent_coef': 0.010056672303932536}. Best is trial 21 with value: 2.459814190864563.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1899 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | -0.00511 |
| time/              |          |
|    fps             | 6108     |
|    iterations      | 1        |
|    time_elapsed    | 3        |
|    total_timesteps | 18990    |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 3            |
|    ep_rew_mean          | 0.00107      |
| time/                   |              |
|    fps                  | 3877         |
|    iterations           | 2            |
|    time_elapsed         | 9            |
|    total_timesteps      | 37980        |
| train/                  |              |
|    approx_kl            | 0.0133591425 |
|    clip_fraction        | 0.116        |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.39        |
|    explained_variance   | 0            

[I 2024-05-01 10:00:10,133] Trial 81 finished with value: 0.058134619146585464 and parameters: {'n_steps': 1899, 'batch_size': 128, 'n_epochs': 4, 'gamma': 0.9557845169322023, 'gae_lambda': 0.9135214921741333, 'ent_coef': 0.010904417233524338}. Best is trial 21 with value: 2.459814190864563.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=2044 and n_envs=10)


Using cpu device
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 3         |
|    ep_rew_mean     | -0.000456 |
| time/              |           |
|    fps             | 7136      |
|    iterations      | 1         |
|    time_elapsed    | 2         |
|    total_timesteps | 20440     |
----------------------------------


[I 2024-05-01 10:00:16,717] Trial 82 finished with value: 2.3270537704229355 and parameters: {'n_steps': 2044, 'batch_size': 128, 'n_epochs': 5, 'gamma': 0.9596771618466632, 'gae_lambda': 0.8991847880022509, 'ent_coef': 0.02841645833921791}. Best is trial 21 with value: 2.459814190864563.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=2033 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | -0.00293 |
| time/              |          |
|    fps             | 6246     |
|    iterations      | 1        |
|    time_elapsed    | 3        |
|    total_timesteps | 20330    |
---------------------------------


[I 2024-05-01 10:00:23,928] Trial 83 finished with value: 0.13160185515880585 and parameters: {'n_steps': 2033, 'batch_size': 128, 'n_epochs': 5, 'gamma': 0.9590247601747259, 'gae_lambda': 0.8950848920704557, 'ent_coef': 0.029754711219300477}. Best is trial 21 with value: 2.459814190864563.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1972 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | -0.00074 |
| time/              |          |
|    fps             | 7493     |
|    iterations      | 1        |
|    time_elapsed    | 2        |
|    total_timesteps | 19720    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | 0.0052      |
| time/                   |             |
|    fps                  | 4101        |
|    iterations           | 2           |
|    time_elapsed         | 9           |
|    total_timesteps      | 39440       |
| train/                  |             |
|    approx_kl            | 0.011859481 |
|    clip_fraction        | 0.117       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.39       |
|    explained_variance   | 0           |
|    learning

[I 2024-05-01 10:00:37,423] Trial 84 finished with value: 0.8661879897117615 and parameters: {'n_steps': 1972, 'batch_size': 128, 'n_epochs': 5, 'gamma': 0.9515556512872283, 'gae_lambda': 0.8843250598469975, 'ent_coef': 0.03160783460390892}. Best is trial 21 with value: 2.459814190864563.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1970 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | -0.01    |
| time/              |          |
|    fps             | 7195     |
|    iterations      | 1        |
|    time_elapsed    | 2        |
|    total_timesteps | 19700    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | -0.000724   |
| time/                   |             |
|    fps                  | 4094        |
|    iterations           | 2           |
|    time_elapsed         | 9           |
|    total_timesteps      | 39400       |
| train/                  |             |
|    approx_kl            | 0.011874574 |
|    clip_fraction        | 0.119       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.39       |
|    explained_variance   | 0           |
|    learning

[I 2024-05-01 10:00:50,680] Trial 85 finished with value: 3.2475749254226685 and parameters: {'n_steps': 1970, 'batch_size': 128, 'n_epochs': 5, 'gamma': 0.9601882016991302, 'gae_lambda': 0.889617767476432, 'ent_coef': 0.027907021786249108}. Best is trial 85 with value: 3.2475749254226685.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1961 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | 0.00501  |
| time/              |          |
|    fps             | 7374     |
|    iterations      | 1        |
|    time_elapsed    | 2        |
|    total_timesteps | 19610    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | 0.000471    |
| time/                   |             |
|    fps                  | 4034        |
|    iterations           | 2           |
|    time_elapsed         | 9           |
|    total_timesteps      | 39220       |
| train/                  |             |
|    approx_kl            | 0.011087871 |
|    clip_fraction        | 0.117       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.4        |
|    explained_variance   | 0           |
|    learning

[I 2024-05-01 10:01:04,077] Trial 86 finished with value: 1.4147919416427612 and parameters: {'n_steps': 1961, 'batch_size': 128, 'n_epochs': 5, 'gamma': 0.9603777374333679, 'gae_lambda': 0.9021291282532172, 'ent_coef': 0.037447067510577345}. Best is trial 85 with value: 3.2475749254226685.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1982 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | 0.00624  |
| time/              |          |
|    fps             | 7155     |
|    iterations      | 1        |
|    time_elapsed    | 2        |
|    total_timesteps | 19820    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | 0.00984     |
| time/                   |             |
|    fps                  | 4078        |
|    iterations           | 2           |
|    time_elapsed         | 9           |
|    total_timesteps      | 39640       |
| train/                  |             |
|    approx_kl            | 0.009429108 |
|    clip_fraction        | 0.111       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.4        |
|    explained_variance   | 0           |
|    learning

[I 2024-05-01 10:01:17,821] Trial 87 finished with value: -1.515755146741867 and parameters: {'n_steps': 1982, 'batch_size': 128, 'n_epochs': 5, 'gamma': 0.9651842402056572, 'gae_lambda': 0.8993354274146529, 'ent_coef': 0.04464154134848037}. Best is trial 85 with value: 3.2475749254226685.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=2037 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | 0.00297  |
| time/              |          |
|    fps             | 7781     |
|    iterations      | 1        |
|    time_elapsed    | 2        |
|    total_timesteps | 20370    |
---------------------------------


[I 2024-05-01 10:01:24,194] Trial 88 finished with value: -0.4880690351128578 and parameters: {'n_steps': 2037, 'batch_size': 128, 'n_epochs': 4, 'gamma': 0.9602452799524611, 'gae_lambda': 0.874713411541761, 'ent_coef': 0.056726039679331264}. Best is trial 85 with value: 3.2475749254226685.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1869 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | 0.000879 |
| time/              |          |
|    fps             | 7097     |
|    iterations      | 1        |
|    time_elapsed    | 2        |
|    total_timesteps | 18690    |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 3          |
|    ep_rew_mean          | -0.00629   |
| time/                   |            |
|    fps                  | 4073       |
|    iterations           | 2          |
|    time_elapsed         | 9          |
|    total_timesteps      | 37380      |
| train/                  |            |
|    approx_kl            | 0.01087752 |
|    clip_fraction        | 0.115      |
|    clip_range           | 0.2        |
|    entropy_loss         | -1.4       |
|    explained_variance   | 0          |
|    learning_rate        | 

[I 2024-05-01 10:01:37,741] Trial 89 finished with value: -0.06201982870697975 and parameters: {'n_steps': 1869, 'batch_size': 128, 'n_epochs': 5, 'gamma': 0.9617925401636181, 'gae_lambda': 0.8910622499818014, 'ent_coef': 0.03943700810826056}. Best is trial 85 with value: 3.2475749254226685.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1931 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | -0.0006  |
| time/              |          |
|    fps             | 7829     |
|    iterations      | 1        |
|    time_elapsed    | 2        |
|    total_timesteps | 19310    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | 0.0101      |
| time/                   |             |
|    fps                  | 4389        |
|    iterations           | 2           |
|    time_elapsed         | 8           |
|    total_timesteps      | 38620       |
| train/                  |             |
|    approx_kl            | 0.010416817 |
|    clip_fraction        | 0.111       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.4        |
|    explained_variance   | 0           |
|    learning

[I 2024-05-01 10:01:50,988] Trial 90 finished with value: 0.5673095732927322 and parameters: {'n_steps': 1931, 'batch_size': 128, 'n_epochs': 5, 'gamma': 0.9578368638940713, 'gae_lambda': 0.9060230628161217, 'ent_coef': 0.03364601209902771}. Best is trial 85 with value: 3.2475749254226685.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1986 and n_envs=10)


Using cpu device
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 3         |
|    ep_rew_mean     | -0.000116 |
| time/              |           |
|    fps             | 7773      |
|    iterations      | 1         |
|    time_elapsed    | 2         |
|    total_timesteps | 19860     |
----------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 3          |
|    ep_rew_mean          | 0.00902    |
| time/                   |            |
|    fps                  | 4547       |
|    iterations           | 2          |
|    time_elapsed         | 8          |
|    total_timesteps      | 39720      |
| train/                  |            |
|    approx_kl            | 0.01208118 |
|    clip_fraction        | 0.118      |
|    clip_range           | 0.2        |
|    entropy_loss         | -1.39      |
|    explained_variance   | 0          |
|    learning_rate

[I 2024-05-01 10:02:04,056] Trial 91 finished with value: -0.8230403661727905 and parameters: {'n_steps': 1986, 'batch_size': 128, 'n_epochs': 5, 'gamma': 0.9637356446222396, 'gae_lambda': 0.9180343235810181, 'ent_coef': 0.028154591390377917}. Best is trial 85 with value: 3.2475749254226685.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1778 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | 0.00211  |
| time/              |          |
|    fps             | 7811     |
|    iterations      | 1        |
|    time_elapsed    | 2        |
|    total_timesteps | 17780    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | 0.000235    |
| time/                   |             |
|    fps                  | 3855        |
|    iterations           | 2           |
|    time_elapsed         | 9           |
|    total_timesteps      | 35560       |
| train/                  |             |
|    approx_kl            | 0.010001636 |
|    clip_fraction        | 0.11        |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.4        |
|    explained_variance   | 0           |
|    learning

[I 2024-05-01 10:02:17,902] Trial 92 finished with value: 0.13733215257525444 and parameters: {'n_steps': 1778, 'batch_size': 128, 'n_epochs': 6, 'gamma': 0.9545587083712401, 'gae_lambda': 0.8511243326851923, 'ent_coef': 0.04690385606285762}. Best is trial 85 with value: 3.2475749254226685.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1865 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | 0.00494  |
| time/              |          |
|    fps             | 7688     |
|    iterations      | 1        |
|    time_elapsed    | 2        |
|    total_timesteps | 18650    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | 0.0137      |
| time/                   |             |
|    fps                  | 3123        |
|    iterations           | 2           |
|    time_elapsed         | 11          |
|    total_timesteps      | 37300       |
| train/                  |             |
|    approx_kl            | 0.011581162 |
|    clip_fraction        | 0.119       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.39       |
|    explained_variance   | 0           |
|    learning

[I 2024-05-01 10:02:36,282] Trial 93 finished with value: -1.5884620547294617 and parameters: {'n_steps': 1865, 'batch_size': 64, 'n_epochs': 5, 'gamma': 0.9672048667588616, 'gae_lambda': 0.9019929678857949, 'ent_coef': 0.03622721990878384}. Best is trial 85 with value: 3.2475749254226685.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1971 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | -0.00074 |
| time/              |          |
|    fps             | 5997     |
|    iterations      | 1        |
|    time_elapsed    | 3        |
|    total_timesteps | 19710    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | 0.000513    |
| time/                   |             |
|    fps                  | 3659        |
|    iterations           | 2           |
|    time_elapsed         | 10          |
|    total_timesteps      | 39420       |
| train/                  |             |
|    approx_kl            | 0.012739986 |
|    clip_fraction        | 0.122       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.39       |
|    explained_variance   | 0           |
|    learning

[I 2024-05-01 10:02:52,323] Trial 94 finished with value: -0.08125653676688671 and parameters: {'n_steps': 1971, 'batch_size': 128, 'n_epochs': 6, 'gamma': 0.9703640005323851, 'gae_lambda': 0.9099857765036472, 'ent_coef': 0.02467680765211355}. Best is trial 85 with value: 3.2475749254226685.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1916 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | 0.00174  |
| time/              |          |
|    fps             | 7331     |
|    iterations      | 1        |
|    time_elapsed    | 2        |
|    total_timesteps | 19160    |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 3          |
|    ep_rew_mean          | 0.0217     |
| time/                   |            |
|    fps                  | 4806       |
|    iterations           | 2          |
|    time_elapsed         | 7          |
|    total_timesteps      | 38320      |
| train/                  |            |
|    approx_kl            | 0.00959249 |
|    clip_fraction        | 0.104      |
|    clip_range           | 0.2        |
|    entropy_loss         | -1.4       |
|    explained_variance   | -1.19e-07  |
|    learning_rate        | 

[I 2024-05-01 10:03:03,659] Trial 95 finished with value: -2.3010664582252502 and parameters: {'n_steps': 1916, 'batch_size': 128, 'n_epochs': 4, 'gamma': 0.9992621880765451, 'gae_lambda': 0.9229722375955962, 'ent_coef': 0.03793522310029152}. Best is trial 85 with value: 3.2475749254226685.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1698 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | 0.00185  |
| time/              |          |
|    fps             | 6006     |
|    iterations      | 1        |
|    time_elapsed    | 2        |
|    total_timesteps | 16980    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | -0.000972   |
| time/                   |             |
|    fps                  | 4233        |
|    iterations           | 2           |
|    time_elapsed         | 8           |
|    total_timesteps      | 33960       |
| train/                  |             |
|    approx_kl            | 0.011249881 |
|    clip_fraction        | 0.113       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.39       |
|    explained_variance   | 0           |
|    learning

[I 2024-05-01 10:03:15,002] Trial 96 finished with value: -0.026055097579956055 and parameters: {'n_steps': 1698, 'batch_size': 128, 'n_epochs': 5, 'gamma': 0.9517072568628157, 'gae_lambda': 0.885688320803236, 'ent_coef': 0.032168742325997916}. Best is trial 85 with value: 3.2475749254226685.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1727 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | 0.000926 |
| time/              |          |
|    fps             | 5688     |
|    iterations      | 1        |
|    time_elapsed    | 3        |
|    total_timesteps | 17270    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | -0.00781    |
| time/                   |             |
|    fps                  | 3488        |
|    iterations           | 2           |
|    time_elapsed         | 9           |
|    total_timesteps      | 34540       |
| train/                  |             |
|    approx_kl            | 0.016442074 |
|    clip_fraction        | 0.13        |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.38       |
|    explained_variance   | 0           |
|    learning

[I 2024-05-01 10:03:29,886] Trial 97 finished with value: 0.44598897732794285 and parameters: {'n_steps': 1727, 'batch_size': 128, 'n_epochs': 7, 'gamma': 0.9590847607131429, 'gae_lambda': 0.8669702139471138, 'ent_coef': 0.012207662099441114}. Best is trial 85 with value: 3.2475749254226685.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1838 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | -0.0134  |
| time/              |          |
|    fps             | 6805     |
|    iterations      | 1        |
|    time_elapsed    | 2        |
|    total_timesteps | 18380    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | -0.00355    |
| time/                   |             |
|    fps                  | 4152        |
|    iterations           | 2           |
|    time_elapsed         | 8           |
|    total_timesteps      | 36760       |
| train/                  |             |
|    approx_kl            | 0.011536317 |
|    clip_fraction        | 0.114       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.4        |
|    explained_variance   | 0           |
|    learning

[I 2024-05-01 10:03:41,907] Trial 98 finished with value: 0.27826379239559174 and parameters: {'n_steps': 1838, 'batch_size': 128, 'n_epochs': 4, 'gamma': 0.9738723796115645, 'gae_lambda': 0.8971925743108463, 'ent_coef': 0.030394394506007838}. Best is trial 85 with value: 3.2475749254226685.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=542 and n_envs=10)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | -0.0039  |
| time/              |          |
|    fps             | 5074     |
|    iterations      | 1        |
|    time_elapsed    | 1        |
|    total_timesteps | 5420     |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 3            |
|    ep_rew_mean          | -0.00528     |
| time/                   |              |
|    fps                  | 3384         |
|    iterations           | 2            |
|    time_elapsed         | 3            |
|    total_timesteps      | 10840        |
| train/                  |              |
|    approx_kl            | 0.0082104625 |
|    clip_fraction        | 0.0853       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.41        |
|    explained_variance   | 0            

[I 2024-05-01 10:03:50,165] Trial 99 finished with value: 0.10180149227380753 and parameters: {'n_steps': 542, 'batch_size': 128, 'n_epochs': 6, 'gamma': 0.95427976992766, 'gae_lambda': 0.8301253401560476, 'ent_coef': 0.013515682479699599}. Best is trial 85 with value: 3.2475749254226685.


Best trial: {'n_steps': 1970, 'batch_size': 128, 'n_epochs': 5, 'gamma': 0.9601882016991302, 'gae_lambda': 0.889617767476432, 'ent_coef': 0.027907021786249108}


# Evaluate policy:

In [30]:
from stable_baselines3.common.evaluation import evaluate_policy

mean_reward, std_reward = evaluate_policy(model, vec_env, n_eval_episodes=10, deterministic=False)
print(f"mean_reward={mean_reward:.2f} +/- {std_reward}")

mean_reward=-0.13 +/- 0.27018839343976264


# Test the agent:

In [31]:
# Test the trained agent
# using the vec_env
obs = vec_env.reset()
n_steps = 20
for step in range(n_steps):
    action, _ = model.predict(obs, deterministic = False)
    print(f"Step {step + 1}")
    print("Action: ", action)
    obs, reward, done, info = vec_env.step(action)
    print("obs=", obs, "reward=", reward, "done=", done)
    env.render()
    if done.all == True:
      print("End of the year", "reward=", reward)


Step 1
Action:  [[0.37647554]
 [0.38586572]
 [0.39531833]
 [0.5670901 ]
 [0.6664914 ]
 [1.        ]
 [0.03694803]
 [0.54122907]
 [0.61086214]
 [0.4967507 ]]
obs= [[1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]] reward= [-0.01097453  0.02483624  0.27102295 -0.01427257 -0.02824794  0.01761931
  0.00595464 -0.00077885  0.06957282  0.01603241] done= [False False False False False False False False False False]
Reference Price: 1.000
Last Action (Price Set by Firm): 0.292
Last Profit: 2.916
Step 2
Action:  [[ 0.9534478 ]
 [ 1.        ]
 [ 0.65922356]
 [-0.34054   ]
 [ 0.84296197]
 [ 0.66949594]
 [ 0.42006978]
 [ 0.6019601 ]
 [-0.5994081 ]
 [ 1.        ]]
obs= [[1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]] reward= [-0.00535809  0.00842392 -0.26195234 -0.0133842  -0.00658159  0.22111201
  0.15131332 -0.00393229  0.04154103  0.00313133] done= [False False False False False False False False False False]
Reference Price: 1.000
Last Action (Price Set by Firm): 0.292
Las