In [2]:
import gymnasium as gym
import torch as t
import gymnasium as gym

from stable_baselines3 import DQN, PPO
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.callbacks import EvalCallback, StopTrainingOnNoModelImprovement, CheckpointCallback


## Simple Reinforcement Learning

Simple test loop with lunar lander to test random action (without model)
and see if reward and action flow is working as expected

In [1]:
import gymnasium as gym

env = gym.make("LunarLander-v2", render_mode="human")

def get_action(env, model = None, obs = None):
    action = 0
    _states = None

    if model is None:
        action = env.action_space.sample()  # agent policy that uses the observation and info
        print(f"Action taken is: {action}")
    elif model is not None:
        action, _states = model.predict(obs, deterministic=True)
    else:
        action = 0
    return action, _states


def run_environment(env, model = None, verbose: bool = False):

    observation, info = env.reset()
    for _ in range(1000):

        action, _states = get_action(env=env, model=model, obs=observation)
        observation, reward, terminated, truncated, info = env.step(action)

        print(f"Current Reward: {reward}")

        if terminated or truncated:
            print("Game is Over!")
            observation, info = env.reset()

    env.close()

In [2]:
run_environment(env=env, model=None, verbose=True)

Action taken is: 3
Current Reward: 0.5336336294087698
Action taken is: 2
Current Reward: -1.3591815551159414
Action taken is: 0
Current Reward: -0.5169990319246267
Action taken is: 3
Current Reward: 0.5100320852958771
Action taken is: 0
Current Reward: -0.38620506834004686
Action taken is: 3
Current Reward: 0.5342914878027056
Action taken is: 3
Current Reward: 0.6771096892316859
Action taken is: 3
Current Reward: 1.1522431266751323
Action taken is: 1
Current Reward: -1.117113815819066
Action taken is: 3
Current Reward: 0.7438421490318501
Action taken is: 0
Current Reward: -0.07078228232546735
Action taken is: 0
Current Reward: -0.1119046402337176
Action taken is: 0
Current Reward: -0.15250740704021837
Action taken is: 2
Current Reward: -1.1690165987281944
Action taken is: 1
Current Reward: -1.33270106775464
Action taken is: 3
Current Reward: 0.9161701611658384
Action taken is: 2
Current Reward: 0.7766691928721456
Action taken is: 3
Current Reward: 0.8258352117125025
Action taken is: 2


### View sample observation/action space and observation space

In [3]:
env = gym.make("LunarLander-v2", render_mode="human")
obs = env.reset()
env.close()

In [4]:
obs

(array([-0.00712576,  1.4217155 , -0.7217721 ,  0.47977316,  0.00826371,
         0.16349222,  0.        ,  0.        ], dtype=float32),
 {})

In [5]:
env.action_space

Discrete(4)

In [6]:
env.observation_space

Box([-1.5       -1.5       -5.        -5.        -3.1415927 -5.
 -0.        -0.       ], [1.5       1.5       5.        5.        3.1415927 5.        1.
 1.       ], (8,), float32)

## Model training

In [7]:
import torch as t

In [8]:
t.cuda.is_available()

True

In [9]:
%tensorboard --logdir lunar_lander_logs/

UsageError: Line magic function `%tensorboard` not found.


In [10]:
import gymnasium as gym

from stable_baselines3 import DQN, PPO
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.callbacks import EvalCallback, StopTrainingOnNoModelImprovement, CheckpointCallback



# Create environment
env = gym.make("LunarLander-v2", render_mode="rgb_array")


# Stop Training callback
stop_train_callback = StopTrainingOnNoModelImprovement(
    max_no_improvement_evals=3_000, 
    min_evals=500, 
    verbose=1
)
eval_callback = EvalCallback(
    env,
    best_model_save_path="./lunar_lander_logs/",
    log_path="./lunar_lander_logs/",
    eval_freq=1_000, 
    callback_after_eval=stop_train_callback, 
    verbose=1,
    deterministic=True, 
    render=False,

)


# Instantiate the agent
model = PPO(
    "MlpPolicy", 
    env, 
    verbose=1, 
    device="cuda", 
    tensorboard_log="./lunar_lander_logs/",
    ent_coef=0.01
)
# Train the agent and display a progress bar
model.learn(
    total_timesteps=int(300_000), 
    progress_bar=True,
    callback=eval_callback,
)
# Save the agent
model.save("dqn_lunar")
del model  # delete trained model to demonstrate loading


Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Logging to ./logs/PPO_9


---------------------------------
| eval/              |          |
|    mean_ep_length  | 64       |
|    mean_reward     | -382     |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 70.8     |
|    mean_reward     | -393     |
| time/              |          |
|    total_timesteps | 2000     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 91.2     |
|    ep_rew_mean     | -216     |
| time/              |          |
|    fps             | 424      |
|    iterations      | 1        |
|    time_elapsed    | 4        |
|    total_timesteps | 2048     |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 124         |
|    mean_reward          | -636        |
| time/                   |             |
|    total_timesteps      | 3000        |
| train/                  |             |
|    approx_kl            | 0.008072646 |
|    clip_fraction        | 0.0292      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.38       |
|    explained_variance   | -0.00111    |
|    learning_rate        | 0.0003      |
|    loss                 | 605         |
|    n_updates            | 10          |
|    policy_gradient_loss | -0.00744    |
|    value_loss           | 1.84e+03    |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 98.8     |
|    mean_reward     | -697     |
| time/              |          |
|    total_timesteps | 4000     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 98.1     |
|    ep_rew_mean     | -190     |
| time/              |          |
|    fps             | 357      |
|    iterations      | 2        |
|    time_elapsed    | 11       |
|    total_timesteps | 4096     |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 225         |
|    mean_reward          | -1.16e+03   |
| time/                   |             |
|    total_timesteps      | 5000        |
| train/                  |             |
|    approx_kl            | 0.005721868 |
|    clip_fraction        | 0.0304      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.38       |
|    explained_variance   | -0.012      |
|    learning_rate        | 0.0003      |
|    loss                 | 430         |
|    n_updates            | 20          |
|    policy_gradient_loss | -0.00565    |
|    value_loss           | 808         |
-----------------------------------------


----------------------------------
| eval/              |           |
|    mean_ep_length  | 223       |
|    mean_reward     | -1.31e+03 |
| time/              |           |
|    total_timesteps | 6000      |
----------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 100      |
|    ep_rew_mean     | -185     |
| time/              |          |
|    fps             | 316      |
|    iterations      | 3        |
|    time_elapsed    | 19       |
|    total_timesteps | 6144     |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 175         |
|    mean_reward          | -1.2e+03    |
| time/                   |             |
|    total_timesteps      | 7000        |
| train/                  |             |
|    approx_kl            | 0.010361899 |
|    clip_fraction        | 0.0685      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.36       |
|    explained_variance   | -0.0199     |
|    learning_rate        | 0.0003      |
|    loss                 | 490         |
|    n_updates            | 30          |
|    policy_gradient_loss | -0.00972    |
|    value_loss           | 902         |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 212      |
|    mean_reward     | -878     |
| time/              |          |
|    total_timesteps | 8000     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 101      |
|    ep_rew_mean     | -165     |
| time/              |          |
|    fps             | 300      |
|    iterations      | 4        |
|    time_elapsed    | 27       |
|    total_timesteps | 8192     |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 140          |
|    mean_reward          | -728         |
| time/                   |              |
|    total_timesteps      | 9000         |
| train/                  |              |
|    approx_kl            | 0.0073655266 |
|    clip_fraction        | 0.0946       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.35        |
|    explained_variance   | -0.00166     |
|    learning_rate        | 0.0003       |
|    loss                 | 179          |
|    n_updates            | 40           |
|    policy_gradient_loss | -0.0103      |
|    value_loss           | 465          |
------------------------------------------


----------------------------------
| eval/              |           |
|    mean_ep_length  | 200       |
|    mean_reward     | -1.07e+03 |
| time/              |           |
|    total_timesteps | 10000     |
----------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 105      |
|    ep_rew_mean     | -162     |
| time/              |          |
|    fps             | 299      |
|    iterations      | 5        |
|    time_elapsed    | 34       |
|    total_timesteps | 10240    |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 339          |
|    mean_reward          | -2.11e+03    |
| time/                   |              |
|    total_timesteps      | 11000        |
| train/                  |              |
|    approx_kl            | 0.0065091914 |
|    clip_fraction        | 0.0149       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.33        |
|    explained_variance   | -0.00238     |
|    learning_rate        | 0.0003       |
|    loss                 | 186          |
|    n_updates            | 50           |
|    policy_gradient_loss | -0.00283     |
|    value_loss           | 461          |
------------------------------------------


----------------------------------
| eval/              |           |
|    mean_ep_length  | 428       |
|    mean_reward     | -2.69e+03 |
| time/              |           |
|    total_timesteps | 12000     |
----------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 109      |
|    ep_rew_mean     | -152     |
| time/              |          |
|    fps             | 273      |
|    iterations      | 6        |
|    time_elapsed    | 44       |
|    total_timesteps | 12288    |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 163         |
|    mean_reward          | -898        |
| time/                   |             |
|    total_timesteps      | 13000       |
| train/                  |             |
|    approx_kl            | 0.014267084 |
|    clip_fraction        | 0.0717      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.31       |
|    explained_variance   | -0.00519    |
|    learning_rate        | 0.0003      |
|    loss                 | 264         |
|    n_updates            | 60          |
|    policy_gradient_loss | -0.0061     |
|    value_loss           | 592         |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 149      |
|    mean_reward     | -868     |
| time/              |          |
|    total_timesteps | 14000    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 115      |
|    ep_rew_mean     | -143     |
| time/              |          |
|    fps             | 276      |
|    iterations      | 7        |
|    time_elapsed    | 51       |
|    total_timesteps | 14336    |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 303         |
|    mean_reward          | -3.05e+03   |
| time/                   |             |
|    total_timesteps      | 15000       |
| train/                  |             |
|    approx_kl            | 0.007284734 |
|    clip_fraction        | 0.0793      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.32       |
|    explained_variance   | -0.00898    |
|    learning_rate        | 0.0003      |
|    loss                 | 159         |
|    n_updates            | 70          |
|    policy_gradient_loss | -0.0103     |
|    value_loss           | 364         |
-----------------------------------------


----------------------------------
| eval/              |           |
|    mean_ep_length  | 241       |
|    mean_reward     | -1.46e+03 |
| time/              |           |
|    total_timesteps | 16000     |
----------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 120      |
|    ep_rew_mean     | -133     |
| time/              |          |
|    fps             | 271      |
|    iterations      | 8        |
|    time_elapsed    | 60       |
|    total_timesteps | 16384    |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 217         |
|    mean_reward          | -821        |
| time/                   |             |
|    total_timesteps      | 17000       |
| train/                  |             |
|    approx_kl            | 0.010227654 |
|    clip_fraction        | 0.115       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.27       |
|    explained_variance   | 2.38e-07    |
|    learning_rate        | 0.0003      |
|    loss                 | 141         |
|    n_updates            | 80          |
|    policy_gradient_loss | -0.0115     |
|    value_loss           | 272         |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 209      |
|    mean_reward     | -770     |
| time/              |          |
|    total_timesteps | 18000    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 127      |
|    ep_rew_mean     | -127     |
| time/              |          |
|    fps             | 271      |
|    iterations      | 9        |
|    time_elapsed    | 67       |
|    total_timesteps | 18432    |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 398         |
|    mean_reward          | -2.99e+03   |
| time/                   |             |
|    total_timesteps      | 19000       |
| train/                  |             |
|    approx_kl            | 0.008135893 |
|    clip_fraction        | 0.0483      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.26       |
|    explained_variance   | -0.000495   |
|    learning_rate        | 0.0003      |
|    loss                 | 352         |
|    n_updates            | 90          |
|    policy_gradient_loss | -0.00631    |
|    value_loss           | 563         |
-----------------------------------------


----------------------------------
| eval/              |           |
|    mean_ep_length  | 503       |
|    mean_reward     | -3.37e+03 |
| time/              |           |
|    total_timesteps | 20000     |
----------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 134      |
|    ep_rew_mean     | -122     |
| time/              |          |
|    fps             | 257      |
|    iterations      | 10       |
|    time_elapsed    | 79       |
|    total_timesteps | 20480    |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 401         |
|    mean_reward          | -3.06e+03   |
| time/                   |             |
|    total_timesteps      | 21000       |
| train/                  |             |
|    approx_kl            | 0.011398224 |
|    clip_fraction        | 0.069       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.19       |
|    explained_variance   | -1.42e-05   |
|    learning_rate        | 0.0003      |
|    loss                 | 121         |
|    n_updates            | 100         |
|    policy_gradient_loss | -0.00791    |
|    value_loss           | 250         |
-----------------------------------------


----------------------------------
| eval/              |           |
|    mean_ep_length  | 428       |
|    mean_reward     | -3.55e+03 |
| time/              |           |
|    total_timesteps | 22000     |
----------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 143      |
|    ep_rew_mean     | -122     |
| time/              |          |
|    fps             | 249      |
|    iterations      | 11       |
|    time_elapsed    | 90       |
|    total_timesteps | 22528    |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 271         |
|    mean_reward          | -1.59e+03   |
| time/                   |             |
|    total_timesteps      | 23000       |
| train/                  |             |
|    approx_kl            | 0.006739198 |
|    clip_fraction        | 0.0444      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.16       |
|    explained_variance   | -0.000264   |
|    learning_rate        | 0.0003      |
|    loss                 | 273         |
|    n_updates            | 110         |
|    policy_gradient_loss | -0.00857    |
|    value_loss           | 323         |
-----------------------------------------


----------------------------------
| eval/              |           |
|    mean_ep_length  | 282       |
|    mean_reward     | -1.62e+03 |
| time/              |           |
|    total_timesteps | 24000     |
----------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 156      |
|    ep_rew_mean     | -126     |
| time/              |          |
|    fps             | 246      |
|    iterations      | 12       |
|    time_elapsed    | 99       |
|    total_timesteps | 24576    |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 546         |
|    mean_reward          | -3.63e+03   |
| time/                   |             |
|    total_timesteps      | 25000       |
| train/                  |             |
|    approx_kl            | 0.004396942 |
|    clip_fraction        | 0.00977     |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.17       |
|    explained_variance   | 0.000594    |
|    learning_rate        | 0.0003      |
|    loss                 | 164         |
|    n_updates            | 120         |
|    policy_gradient_loss | -0.00641    |
|    value_loss           | 419         |
-----------------------------------------


----------------------------------
| eval/              |           |
|    mean_ep_length  | 573       |
|    mean_reward     | -4.15e+03 |
| time/              |           |
|    total_timesteps | 26000     |
----------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 163      |
|    ep_rew_mean     | -116     |
| time/              |          |
|    fps             | 235      |
|    iterations      | 13       |
|    time_elapsed    | 113      |
|    total_timesteps | 26624    |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 359          |
|    mean_reward          | -1.35e+03    |
| time/                   |              |
|    total_timesteps      | 27000        |
| train/                  |              |
|    approx_kl            | 0.0059246146 |
|    clip_fraction        | 0.00508      |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.12        |
|    explained_variance   | -0.0173      |
|    learning_rate        | 0.0003       |
|    loss                 | 95.5         |
|    n_updates            | 130          |
|    policy_gradient_loss | -0.00537     |
|    value_loss           | 239          |
------------------------------------------


----------------------------------
| eval/              |           |
|    mean_ep_length  | 415       |
|    mean_reward     | -1.58e+03 |
| time/              |           |
|    total_timesteps | 28000     |
----------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 178      |
|    ep_rew_mean     | -114     |
| time/              |          |
|    fps             | 230      |
|    iterations      | 14       |
|    time_elapsed    | 124      |
|    total_timesteps | 28672    |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 613         |
|    mean_reward          | -1.36e+03   |
| time/                   |             |
|    total_timesteps      | 29000       |
| train/                  |             |
|    approx_kl            | 0.008692566 |
|    clip_fraction        | 0.0944      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.23       |
|    explained_variance   | -0.0914     |
|    learning_rate        | 0.0003      |
|    loss                 | 46.1        |
|    n_updates            | 140         |
|    policy_gradient_loss | -0.0107     |
|    value_loss           | 111         |
-----------------------------------------


----------------------------------
| eval/              |           |
|    mean_ep_length  | 486       |
|    mean_reward     | -1.01e+03 |
| time/              |           |
|    total_timesteps | 30000     |
----------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 193      |
|    ep_rew_mean     | -112     |
| time/              |          |
|    fps             | 221      |
|    iterations      | 15       |
|    time_elapsed    | 138      |
|    total_timesteps | 30720    |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 668          |
|    mean_reward          | -1.12e+03    |
| time/                   |              |
|    total_timesteps      | 31000        |
| train/                  |              |
|    approx_kl            | 0.0013704642 |
|    clip_fraction        | 0.000684     |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.13        |
|    explained_variance   | -0.0597      |
|    learning_rate        | 0.0003       |
|    loss                 | 143          |
|    n_updates            | 150          |
|    policy_gradient_loss | -0.00339     |
|    value_loss           | 297          |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 514      |
|    mean_reward     | -915     |
| time/              |          |
|    total_timesteps | 32000    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 207      |
|    ep_rew_mean     | -108     |
| time/              |          |
|    fps             | 213      |
|    iterations      | 16       |
|    time_elapsed    | 153      |
|    total_timesteps | 32768    |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 672         |
|    mean_reward          | -2.12e+03   |
| time/                   |             |
|    total_timesteps      | 33000       |
| train/                  |             |
|    approx_kl            | 0.008092773 |
|    clip_fraction        | 0.0261      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.11       |
|    explained_variance   | 0.0691      |
|    learning_rate        | 0.0003      |
|    loss                 | 89.8        |
|    n_updates            | 160         |
|    policy_gradient_loss | -0.00351    |
|    value_loss           | 218         |
-----------------------------------------


----------------------------------
| eval/              |           |
|    mean_ep_length  | 709       |
|    mean_reward     | -2.06e+03 |
| time/              |           |
|    total_timesteps | 34000     |
----------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 227      |
|    ep_rew_mean     | -111     |
| time/              |          |
|    fps             | 204      |
|    iterations      | 17       |
|    time_elapsed    | 170      |
|    total_timesteps | 34816    |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 459          |
|    mean_reward          | -697         |
| time/                   |              |
|    total_timesteps      | 35000        |
| train/                  |              |
|    approx_kl            | 0.0093319025 |
|    clip_fraction        | 0.0411       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.15        |
|    explained_variance   | 0.0864       |
|    learning_rate        | 0.0003       |
|    loss                 | 49.8         |
|    n_updates            | 170          |
|    policy_gradient_loss | -0.00408     |
|    value_loss           | 124          |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 574      |
|    mean_reward     | -866     |
| time/              |          |
|    total_timesteps | 36000    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 242      |
|    ep_rew_mean     | -107     |
| time/              |          |
|    fps             | 199      |
|    iterations      | 18       |
|    time_elapsed    | 184      |
|    total_timesteps | 36864    |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 604          |
|    mean_reward          | -808         |
| time/                   |              |
|    total_timesteps      | 37000        |
| train/                  |              |
|    approx_kl            | 0.0037002591 |
|    clip_fraction        | 0.0176       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.16        |
|    explained_variance   | 0.355        |
|    learning_rate        | 0.0003       |
|    loss                 | 80.7         |
|    n_updates            | 180          |
|    policy_gradient_loss | -0.0041      |
|    value_loss           | 135          |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 451      |
|    mean_reward     | -604     |
| time/              |          |
|    total_timesteps | 38000    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 255      |
|    ep_rew_mean     | -104     |
| time/              |          |
|    fps             | 195      |
|    iterations      | 19       |
|    time_elapsed    | 199      |
|    total_timesteps | 38912    |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 490         |
|    mean_reward          | -405        |
| time/                   |             |
|    total_timesteps      | 39000       |
| train/                  |             |
|    approx_kl            | 0.004380333 |
|    clip_fraction        | 0.0168      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.07       |
|    explained_variance   | 0.295       |
|    learning_rate        | 0.0003      |
|    loss                 | 84.5        |
|    n_updates            | 190         |
|    policy_gradient_loss | -0.00388    |
|    value_loss           | 228         |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 797      |
|    mean_reward     | -725     |
| time/              |          |
|    total_timesteps | 40000    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 267      |
|    ep_rew_mean     | -104     |
| time/              |          |
|    fps             | 189      |
|    iterations      | 20       |
|    time_elapsed    | 215      |
|    total_timesteps | 40960    |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 656          |
|    mean_reward          | -750         |
| time/                   |              |
|    total_timesteps      | 41000        |
| train/                  |              |
|    approx_kl            | 0.0047771856 |
|    clip_fraction        | 0.015        |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.03        |
|    explained_variance   | 0.442        |
|    learning_rate        | 0.0003       |
|    loss                 | 135          |
|    n_updates            | 200          |
|    policy_gradient_loss | -0.00668     |
|    value_loss           | 201          |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 609      |
|    mean_reward     | -699     |
| time/              |          |
|    total_timesteps | 42000    |
---------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 522      |
|    mean_reward     | -567     |
| time/              |          |
|    total_timesteps | 43000    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 282      |
|    ep_rew_mean     | -102     |
| time/              |          |
|    fps             | 181      |
|    iterations      | 21       |
|    time_elapsed    | 236      |
|    total_timesteps | 43008    |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 811          |
|    mean_reward          | -770         |
| time/                   |              |
|    total_timesteps      | 44000        |
| train/                  |              |
|    approx_kl            | 0.0063147657 |
|    clip_fraction        | 0.0288       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.09        |
|    explained_variance   | 0.443        |
|    learning_rate        | 0.0003       |
|    loss                 | 57.3         |
|    n_updates            | 210          |
|    policy_gradient_loss | -0.00255     |
|    value_loss           | 138          |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 876      |
|    mean_reward     | -870     |
| time/              |          |
|    total_timesteps | 45000    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 298      |
|    ep_rew_mean     | -102     |
| time/              |          |
|    fps             | 174      |
|    iterations      | 22       |
|    time_elapsed    | 258      |
|    total_timesteps | 45056    |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 759         |
|    mean_reward          | -771        |
| time/                   |             |
|    total_timesteps      | 46000       |
| train/                  |             |
|    approx_kl            | 0.008297423 |
|    clip_fraction        | 0.0565      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.12       |
|    explained_variance   | 0.668       |
|    learning_rate        | 0.0003      |
|    loss                 | 46.6        |
|    n_updates            | 220         |
|    policy_gradient_loss | -0.00772    |
|    value_loss           | 119         |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 528      |
|    mean_reward     | -614     |
| time/              |          |
|    total_timesteps | 47000    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 313      |
|    ep_rew_mean     | -97.8    |
| time/              |          |
|    fps             | 171      |
|    iterations      | 23       |
|    time_elapsed    | 275      |
|    total_timesteps | 47104    |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 699          |
|    mean_reward          | -627         |
| time/                   |              |
|    total_timesteps      | 48000        |
| train/                  |              |
|    approx_kl            | 0.0068912134 |
|    clip_fraction        | 0.0345       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.982       |
|    explained_variance   | 0.488        |
|    learning_rate        | 0.0003       |
|    loss                 | 22.5         |
|    n_updates            | 230          |
|    policy_gradient_loss | -0.00745     |
|    value_loss           | 120          |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 993      |
|    mean_reward     | -877     |
| time/              |          |
|    total_timesteps | 49000    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 329      |
|    ep_rew_mean     | -95      |
| time/              |          |
|    fps             | 166      |
|    iterations      | 24       |
|    time_elapsed    | 295      |
|    total_timesteps | 49152    |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 362         |
|    mean_reward          | -317        |
| time/                   |             |
|    total_timesteps      | 50000       |
| train/                  |             |
|    approx_kl            | 0.007755442 |
|    clip_fraction        | 0.0764      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.07       |
|    explained_variance   | 0.733       |
|    learning_rate        | 0.0003      |
|    loss                 | 35          |
|    n_updates            | 240         |
|    policy_gradient_loss | -0.00704    |
|    value_loss           | 86.2        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 387      |
|    mean_reward     | -339     |
| time/              |          |
|    total_timesteps | 51000    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 349      |
|    ep_rew_mean     | -98.8    |
| time/              |          |
|    fps             | 166      |
|    iterations      | 25       |
|    time_elapsed    | 307      |
|    total_timesteps | 51200    |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 965          |
|    mean_reward          | -455         |
| time/                   |              |
|    total_timesteps      | 52000        |
| train/                  |              |
|    approx_kl            | 0.0076985406 |
|    clip_fraction        | 0.106        |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.1         |
|    explained_variance   | 0.608        |
|    learning_rate        | 0.0003       |
|    loss                 | 50.6         |
|    n_updates            | 250          |
|    policy_gradient_loss | -0.00848     |
|    value_loss           | 71.8         |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 881      |
|    mean_reward     | -395     |
| time/              |          |
|    total_timesteps | 53000    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 368      |
|    ep_rew_mean     | -98.5    |
| time/              |          |
|    fps             | 160      |
|    iterations      | 26       |
|    time_elapsed    | 331      |
|    total_timesteps | 53248    |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 1e+03       |
|    mean_reward          | -625        |
| time/                   |             |
|    total_timesteps      | 54000       |
| train/                  |             |
|    approx_kl            | 0.009111289 |
|    clip_fraction        | 0.109       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.03       |
|    explained_variance   | 0.838       |
|    learning_rate        | 0.0003      |
|    loss                 | 27.9        |
|    n_updates            | 260         |
|    policy_gradient_loss | -0.00955    |
|    value_loss           | 43.4        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 1e+03    |
|    mean_reward     | -643     |
| time/              |          |
|    total_timesteps | 55000    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 385      |
|    ep_rew_mean     | -96.5    |
| time/              |          |
|    fps             | 154      |
|    iterations      | 27       |
|    time_elapsed    | 357      |
|    total_timesteps | 55296    |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 1e+03       |
|    mean_reward          | -288        |
| time/                   |             |
|    total_timesteps      | 56000       |
| train/                  |             |
|    approx_kl            | 0.014852885 |
|    clip_fraction        | 0.194       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.14       |
|    explained_variance   | 0.802       |
|    learning_rate        | 0.0003      |
|    loss                 | 26.6        |
|    n_updates            | 270         |
|    policy_gradient_loss | -0.0149     |
|    value_loss           | 38.6        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 1e+03    |
|    mean_reward     | -332     |
| time/              |          |
|    total_timesteps | 57000    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 406      |
|    ep_rew_mean     | -94      |
| time/              |          |
|    fps             | 149      |
|    iterations      | 28       |
|    time_elapsed    | 383      |
|    total_timesteps | 57344    |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 972         |
|    mean_reward          | -512        |
| time/                   |             |
|    total_timesteps      | 58000       |
| train/                  |             |
|    approx_kl            | 0.006886605 |
|    clip_fraction        | 0.0373      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.06       |
|    explained_variance   | 0.661       |
|    learning_rate        | 0.0003      |
|    loss                 | 37.2        |
|    n_updates            | 280         |
|    policy_gradient_loss | -0.00712    |
|    value_loss           | 83.1        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 754      |
|    mean_reward     | -466     |
| time/              |          |
|    total_timesteps | 59000    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 421      |
|    ep_rew_mean     | -91.4    |
| time/              |          |
|    fps             | 146      |
|    iterations      | 29       |
|    time_elapsed    | 405      |
|    total_timesteps | 59392    |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 943         |
|    mean_reward          | -464        |
| time/                   |             |
|    total_timesteps      | 60000       |
| train/                  |             |
|    approx_kl            | 0.009201176 |
|    clip_fraction        | 0.0646      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.01       |
|    explained_variance   | 0.665       |
|    learning_rate        | 0.0003      |
|    loss                 | 11.7        |
|    n_updates            | 290         |
|    policy_gradient_loss | -0.00661    |
|    value_loss           | 37.7        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 980      |
|    mean_reward     | -468     |
| time/              |          |
|    total_timesteps | 61000    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 439      |
|    ep_rew_mean     | -89.2    |
| time/              |          |
|    fps             | 143      |
|    iterations      | 30       |
|    time_elapsed    | 429      |
|    total_timesteps | 61440    |
---------------------------------


----------------------------------------
| eval/                   |            |
|    mean_ep_length       | 1e+03      |
|    mean_reward          | -513       |
| time/                   |            |
|    total_timesteps      | 62000      |
| train/                  |            |
|    approx_kl            | 0.01033192 |
|    clip_fraction        | 0.1        |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.966     |
|    explained_variance   | 0.63       |
|    learning_rate        | 0.0003     |
|    loss                 | 33.5       |
|    n_updates            | 300        |
|    policy_gradient_loss | -0.00833   |
|    value_loss           | 80.9       |
----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 1e+03    |
|    mean_reward     | -475     |
| time/              |          |
|    total_timesteps | 63000    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 452      |
|    ep_rew_mean     | -91.6    |
| time/              |          |
|    fps             | 139      |
|    iterations      | 31       |
|    time_elapsed    | 453      |
|    total_timesteps | 63488    |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 1e+03       |
|    mean_reward          | -301        |
| time/                   |             |
|    total_timesteps      | 64000       |
| train/                  |             |
|    approx_kl            | 0.005611252 |
|    clip_fraction        | 0.0276      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.878      |
|    explained_variance   | 0.552       |
|    learning_rate        | 0.0003      |
|    loss                 | 42.4        |
|    n_updates            | 310         |
|    policy_gradient_loss | -0.00619    |
|    value_loss           | 199         |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 1e+03    |
|    mean_reward     | -295     |
| time/              |          |
|    total_timesteps | 65000    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 466      |
|    ep_rew_mean     | -85.2    |
| time/              |          |
|    fps             | 136      |
|    iterations      | 32       |
|    time_elapsed    | 480      |
|    total_timesteps | 65536    |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 1e+03        |
|    mean_reward          | -299         |
| time/                   |              |
|    total_timesteps      | 66000        |
| train/                  |              |
|    approx_kl            | 0.0044276454 |
|    clip_fraction        | 0.0419       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.02        |
|    explained_variance   | 0.83         |
|    learning_rate        | 0.0003       |
|    loss                 | 40           |
|    n_updates            | 320          |
|    policy_gradient_loss | -0.00415     |
|    value_loss           | 44.5         |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 1e+03    |
|    mean_reward     | -299     |
| time/              |          |
|    total_timesteps | 67000    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 485      |
|    ep_rew_mean     | -78.3    |
| time/              |          |
|    fps             | 132      |
|    iterations      | 33       |
|    time_elapsed    | 508      |
|    total_timesteps | 67584    |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 1e+03       |
|    mean_reward          | -216        |
| time/                   |             |
|    total_timesteps      | 68000       |
| train/                  |             |
|    approx_kl            | 0.012084256 |
|    clip_fraction        | 0.118       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.88       |
|    explained_variance   | 0.866       |
|    learning_rate        | 0.0003      |
|    loss                 | 14.2        |
|    n_updates            | 330         |
|    policy_gradient_loss | -0.0109     |
|    value_loss           | 37.8        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 1e+03    |
|    mean_reward     | -203     |
| time/              |          |
|    total_timesteps | 69000    |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 491      |
|    ep_rew_mean     | -76.9    |
| time/              |          |
|    fps             | 130      |
|    iterations      | 34       |
|    time_elapsed    | 534      |
|    total_timesteps | 69632    |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 1e+03        |
|    mean_reward          | -331         |
| time/                   |              |
|    total_timesteps      | 70000        |
| train/                  |              |
|    approx_kl            | 0.0054237647 |
|    clip_fraction        | 0.0101       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.957       |
|    explained_variance   | 0.69         |
|    learning_rate        | 0.0003       |
|    loss                 | 29.4         |
|    n_updates            | 340          |
|    policy_gradient_loss | -0.00214     |
|    value_loss           | 79.1         |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 1e+03    |
|    mean_reward     | -293     |
| time/              |          |
|    total_timesteps | 71000    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 514      |
|    ep_rew_mean     | -74.5    |
| time/              |          |
|    fps             | 128      |
|    iterations      | 35       |
|    time_elapsed    | 559      |
|    total_timesteps | 71680    |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 1e+03       |
|    mean_reward          | -219        |
| time/                   |             |
|    total_timesteps      | 72000       |
| train/                  |             |
|    approx_kl            | 0.011393597 |
|    clip_fraction        | 0.0951      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.952      |
|    explained_variance   | 0.903       |
|    learning_rate        | 0.0003      |
|    loss                 | 7.24        |
|    n_updates            | 350         |
|    policy_gradient_loss | -0.00981    |
|    value_loss           | 14.7        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 1e+03    |
|    mean_reward     | -230     |
| time/              |          |
|    total_timesteps | 73000    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 532      |
|    ep_rew_mean     | -72.5    |
| time/              |          |
|    fps             | 125      |
|    iterations      | 36       |
|    time_elapsed    | 587      |
|    total_timesteps | 73728    |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 1e+03       |
|    mean_reward          | -176        |
| time/                   |             |
|    total_timesteps      | 74000       |
| train/                  |             |
|    approx_kl            | 0.008430514 |
|    clip_fraction        | 0.0968      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.03       |
|    explained_variance   | 0.866       |
|    learning_rate        | 0.0003      |
|    loss                 | 4.16        |
|    n_updates            | 360         |
|    policy_gradient_loss | -0.00657    |
|    value_loss           | 14          |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 1e+03    |
|    mean_reward     | -147     |
| time/              |          |
|    total_timesteps | 75000    |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 547      |
|    ep_rew_mean     | -68.5    |
| time/              |          |
|    fps             | 123      |
|    iterations      | 37       |
|    time_elapsed    | 616      |
|    total_timesteps | 75776    |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 1e+03       |
|    mean_reward          | -55.3       |
| time/                   |             |
|    total_timesteps      | 76000       |
| train/                  |             |
|    approx_kl            | 0.010760736 |
|    clip_fraction        | 0.07        |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.01       |
|    explained_variance   | 0.913       |
|    learning_rate        | 0.0003      |
|    loss                 | 2.1         |
|    n_updates            | 370         |
|    policy_gradient_loss | -0.00684    |
|    value_loss           | 11.3        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 1e+03    |
|    mean_reward     | -80.1    |
| time/              |          |
|    total_timesteps | 77000    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 565      |
|    ep_rew_mean     | -64.2    |
| time/              |          |
|    fps             | 120      |
|    iterations      | 38       |
|    time_elapsed    | 644      |
|    total_timesteps | 77824    |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 1e+03       |
|    mean_reward          | -22.8       |
| time/                   |             |
|    total_timesteps      | 78000       |
| train/                  |             |
|    approx_kl            | 0.010576774 |
|    clip_fraction        | 0.0913      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1          |
|    explained_variance   | 0.926       |
|    learning_rate        | 0.0003      |
|    loss                 | 2.24        |
|    n_updates            | 380         |
|    policy_gradient_loss | -0.00475    |
|    value_loss           | 10.1        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 1e+03    |
|    mean_reward     | -45.3    |
| time/              |          |
|    total_timesteps | 79000    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 586      |
|    ep_rew_mean     | -60.6    |
| time/              |          |
|    fps             | 118      |
|    iterations      | 39       |
|    time_elapsed    | 671      |
|    total_timesteps | 79872    |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 978         |
|    mean_reward          | 11.5        |
| time/                   |             |
|    total_timesteps      | 80000       |
| train/                  |             |
|    approx_kl            | 0.008065812 |
|    clip_fraction        | 0.0523      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.95       |
|    explained_variance   | 0.802       |
|    learning_rate        | 0.0003      |
|    loss                 | 7.25        |
|    n_updates            | 390         |
|    policy_gradient_loss | -0.00479    |
|    value_loss           | 39.7        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 1e+03    |
|    mean_reward     | -17.3    |
| time/              |          |
|    total_timesteps | 81000    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 597      |
|    ep_rew_mean     | -57.2    |
| time/              |          |
|    fps             | 117      |
|    iterations      | 40       |
|    time_elapsed    | 699      |
|    total_timesteps | 81920    |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 1e+03        |
|    mean_reward          | -28.2        |
| time/                   |              |
|    total_timesteps      | 82000        |
| train/                  |              |
|    approx_kl            | 0.0056858687 |
|    clip_fraction        | 0.0509       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.01        |
|    explained_variance   | 0.89         |
|    learning_rate        | 0.0003       |
|    loss                 | 5.46         |
|    n_updates            | 400          |
|    policy_gradient_loss | -0.00379     |
|    value_loss           | 17.6         |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 1e+03    |
|    mean_reward     | -30.9    |
| time/              |          |
|    total_timesteps | 83000    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 608      |
|    ep_rew_mean     | -55.1    |
| time/              |          |
|    fps             | 115      |
|    iterations      | 41       |
|    time_elapsed    | 726      |
|    total_timesteps | 83968    |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 990         |
|    mean_reward          | 5.55        |
| time/                   |             |
|    total_timesteps      | 84000       |
| train/                  |             |
|    approx_kl            | 0.006488357 |
|    clip_fraction        | 0.0833      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.951      |
|    explained_variance   | 0.645       |
|    learning_rate        | 0.0003      |
|    loss                 | 14.4        |
|    n_updates            | 410         |
|    policy_gradient_loss | -0.00384    |
|    value_loss           | 64.5        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 988      |
|    mean_reward     | 3.13     |
| time/              |          |
|    total_timesteps | 85000    |
---------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 961      |
|    mean_reward     | 18.7     |
| time/              |          |
|    total_timesteps | 86000    |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 638      |
|    ep_rew_mean     | -46.5    |
| time/              |          |
|    fps             | 112      |
|    iterations      | 42       |
|    time_elapsed    | 763      |
|    total_timesteps | 86016    |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 1e+03       |
|    mean_reward          | -18.9       |
| time/                   |             |
|    total_timesteps      | 87000       |
| train/                  |             |
|    approx_kl            | 0.010028921 |
|    clip_fraction        | 0.118       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.974      |
|    explained_variance   | 0.92        |
|    learning_rate        | 0.0003      |
|    loss                 | 2.3         |
|    n_updates            | 420         |
|    policy_gradient_loss | -0.00714    |
|    value_loss           | 5.33        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 981      |
|    mean_reward     | 16.5     |
| time/              |          |
|    total_timesteps | 88000    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 654      |
|    ep_rew_mean     | -40.9    |
| time/              |          |
|    fps             | 111      |
|    iterations      | 43       |
|    time_elapsed    | 791      |
|    total_timesteps | 88064    |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 897         |
|    mean_reward          | 112         |
| time/                   |             |
|    total_timesteps      | 89000       |
| train/                  |             |
|    approx_kl            | 0.010428283 |
|    clip_fraction        | 0.119       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.964      |
|    explained_variance   | 0.925       |
|    learning_rate        | 0.0003      |
|    loss                 | 2.1         |
|    n_updates            | 430         |
|    policy_gradient_loss | -0.0102     |
|    value_loss           | 5.44        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 759      |
|    mean_reward     | 183      |
| time/              |          |
|    total_timesteps | 90000    |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 664      |
|    ep_rew_mean     | -36.1    |
| time/              |          |
|    fps             | 110      |
|    iterations      | 44       |
|    time_elapsed    | 813      |
|    total_timesteps | 90112    |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 794          |
|    mean_reward          | 101          |
| time/                   |              |
|    total_timesteps      | 91000        |
| train/                  |              |
|    approx_kl            | 0.0047692945 |
|    clip_fraction        | 0.025        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.976       |
|    explained_variance   | 0.661        |
|    learning_rate        | 0.0003       |
|    loss                 | 4.76         |
|    n_updates            | 440          |
|    policy_gradient_loss | -0.00145     |
|    value_loss           | 50.7         |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 822      |
|    mean_reward     | 160      |
| time/              |          |
|    total_timesteps | 92000    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 678      |
|    ep_rew_mean     | -29      |
| time/              |          |
|    fps             | 110      |
|    iterations      | 45       |
|    time_elapsed    | 836      |
|    total_timesteps | 92160    |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 752         |
|    mean_reward          | 163         |
| time/                   |             |
|    total_timesteps      | 93000       |
| train/                  |             |
|    approx_kl            | 0.012287942 |
|    clip_fraction        | 0.0529      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.908      |
|    explained_variance   | 0.899       |
|    learning_rate        | 0.0003      |
|    loss                 | 4.84        |
|    n_updates            | 450         |
|    policy_gradient_loss | -0.00322    |
|    value_loss           | 17.5        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 706      |
|    mean_reward     | 124      |
| time/              |          |
|    total_timesteps | 94000    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 689      |
|    ep_rew_mean     | -23.6    |
| time/              |          |
|    fps             | 110      |
|    iterations      | 46       |
|    time_elapsed    | 854      |
|    total_timesteps | 94208    |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 880          |
|    mean_reward          | 123          |
| time/                   |              |
|    total_timesteps      | 95000        |
| train/                  |              |
|    approx_kl            | 0.0022836234 |
|    clip_fraction        | 0.00664      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.933       |
|    explained_variance   | 0.508        |
|    learning_rate        | 0.0003       |
|    loss                 | 4.86         |
|    n_updates            | 460          |
|    policy_gradient_loss | -0.00131     |
|    value_loss           | 85.6         |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 720      |
|    mean_reward     | 168      |
| time/              |          |
|    total_timesteps | 96000    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 700      |
|    ep_rew_mean     | -22.6    |
| time/              |          |
|    fps             | 109      |
|    iterations      | 47       |
|    time_elapsed    | 875      |
|    total_timesteps | 96256    |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 779         |
|    mean_reward          | 144         |
| time/                   |             |
|    total_timesteps      | 97000       |
| train/                  |             |
|    approx_kl            | 0.007104121 |
|    clip_fraction        | 0.0547      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.878      |
|    explained_variance   | 0.787       |
|    learning_rate        | 0.0003      |
|    loss                 | 16.5        |
|    n_updates            | 470         |
|    policy_gradient_loss | -0.0018     |
|    value_loss           | 43.7        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 742      |
|    mean_reward     | 152      |
| time/              |          |
|    total_timesteps | 98000    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 723      |
|    ep_rew_mean     | -18.5    |
| time/              |          |
|    fps             | 109      |
|    iterations      | 48       |
|    time_elapsed    | 894      |
|    total_timesteps | 98304    |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 633         |
|    mean_reward          | 150         |
| time/                   |             |
|    total_timesteps      | 99000       |
| train/                  |             |
|    approx_kl            | 0.008666029 |
|    clip_fraction        | 0.0612      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.903      |
|    explained_variance   | 0.904       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.22        |
|    n_updates            | 480         |
|    policy_gradient_loss | -0.00432    |
|    value_loss           | 8.87        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 664      |
|    mean_reward     | 190      |
| time/              |          |
|    total_timesteps | 100000   |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 737      |
|    ep_rew_mean     | -13.7    |
| time/              |          |
|    fps             | 110      |
|    iterations      | 49       |
|    time_elapsed    | 911      |
|    total_timesteps | 100352   |
---------------------------------


----------------------------------------
| eval/                   |            |
|    mean_ep_length       | 611        |
|    mean_reward          | 186        |
| time/                   |            |
|    total_timesteps      | 101000     |
| train/                  |            |
|    approx_kl            | 0.00661172 |
|    clip_fraction        | 0.0894     |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.877     |
|    explained_variance   | 0.919      |
|    learning_rate        | 0.0003     |
|    loss                 | 7.93       |
|    n_updates            | 490        |
|    policy_gradient_loss | -0.00288   |
|    value_loss           | 9.78       |
----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 632      |
|    mean_reward     | 211      |
| time/              |          |
|    total_timesteps | 102000   |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 754      |
|    ep_rew_mean     | -9.65    |
| time/              |          |
|    fps             | 110      |
|    iterations      | 50       |
|    time_elapsed    | 929      |
|    total_timesteps | 102400   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 535         |
|    mean_reward          | 165         |
| time/                   |             |
|    total_timesteps      | 103000      |
| train/                  |             |
|    approx_kl            | 0.006258712 |
|    clip_fraction        | 0.053       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.826      |
|    explained_variance   | 0.967       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.06        |
|    n_updates            | 500         |
|    policy_gradient_loss | -0.00282    |
|    value_loss           | 2.94        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 586      |
|    mean_reward     | 138      |
| time/              |          |
|    total_timesteps | 104000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 758      |
|    ep_rew_mean     | -5.23    |
| time/              |          |
|    fps             | 110      |
|    iterations      | 51       |
|    time_elapsed    | 944      |
|    total_timesteps | 104448   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 689          |
|    mean_reward          | 112          |
| time/                   |              |
|    total_timesteps      | 105000       |
| train/                  |              |
|    approx_kl            | 0.0033253413 |
|    clip_fraction        | 0.0397       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.791       |
|    explained_variance   | 0.937        |
|    learning_rate        | 0.0003       |
|    loss                 | 6.78         |
|    n_updates            | 510          |
|    policy_gradient_loss | -0.00325     |
|    value_loss           | 8.62         |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 562      |
|    mean_reward     | 139      |
| time/              |          |
|    total_timesteps | 106000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 768      |
|    ep_rew_mean     | 1.18     |
| time/              |          |
|    fps             | 110      |
|    iterations      | 52       |
|    time_elapsed    | 961      |
|    total_timesteps | 106496   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 666         |
|    mean_reward          | 173         |
| time/                   |             |
|    total_timesteps      | 107000      |
| train/                  |             |
|    approx_kl            | 0.007794665 |
|    clip_fraction        | 0.0546      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.817      |
|    explained_variance   | 0.769       |
|    learning_rate        | 0.0003      |
|    loss                 | 49.4        |
|    n_updates            | 520         |
|    policy_gradient_loss | -0.00164    |
|    value_loss           | 43.7        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 727      |
|    mean_reward     | 130      |
| time/              |          |
|    total_timesteps | 108000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 775      |
|    ep_rew_mean     | 4.19     |
| time/              |          |
|    fps             | 110      |
|    iterations      | 53       |
|    time_elapsed    | 979      |
|    total_timesteps | 108544   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 741          |
|    mean_reward          | 85.9         |
| time/                   |              |
|    total_timesteps      | 109000       |
| train/                  |              |
|    approx_kl            | 0.0085768625 |
|    clip_fraction        | 0.0452       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.779       |
|    explained_variance   | 0.747        |
|    learning_rate        | 0.0003       |
|    loss                 | 15.2         |
|    n_updates            | 530          |
|    policy_gradient_loss | -0.00446     |
|    value_loss           | 88.6         |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 701      |
|    mean_reward     | 36.6     |
| time/              |          |
|    total_timesteps | 110000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 786      |
|    ep_rew_mean     | 10.4     |
| time/              |          |
|    fps             | 110      |
|    iterations      | 54       |
|    time_elapsed    | 998      |
|    total_timesteps | 110592   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 749          |
|    mean_reward          | 99.9         |
| time/                   |              |
|    total_timesteps      | 111000       |
| train/                  |              |
|    approx_kl            | 0.0031157124 |
|    clip_fraction        | 0.0464       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.824       |
|    explained_variance   | 0.943        |
|    learning_rate        | 0.0003       |
|    loss                 | 3.47         |
|    n_updates            | 540          |
|    policy_gradient_loss | -0.00377     |
|    value_loss           | 9.38         |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 633      |
|    mean_reward     | 71.3     |
| time/              |          |
|    total_timesteps | 112000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 801      |
|    ep_rew_mean     | 13.6     |
| time/              |          |
|    fps             | 110      |
|    iterations      | 55       |
|    time_elapsed    | 1016     |
|    total_timesteps | 112640   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 636          |
|    mean_reward          | 172          |
| time/                   |              |
|    total_timesteps      | 113000       |
| train/                  |              |
|    approx_kl            | 0.0127428975 |
|    clip_fraction        | 0.12         |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.77        |
|    explained_variance   | 0.964        |
|    learning_rate        | 0.0003       |
|    loss                 | 7.19         |
|    n_updates            | 550          |
|    policy_gradient_loss | -0.00873     |
|    value_loss           | 4.73         |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 630      |
|    mean_reward     | 188      |
| time/              |          |
|    total_timesteps | 114000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 812      |
|    ep_rew_mean     | 19.1     |
| time/              |          |
|    fps             | 111      |
|    iterations      | 56       |
|    time_elapsed    | 1033     |
|    total_timesteps | 114688   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 723         |
|    mean_reward          | 156         |
| time/                   |             |
|    total_timesteps      | 115000      |
| train/                  |             |
|    approx_kl            | 0.009116065 |
|    clip_fraction        | 0.112       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.858      |
|    explained_variance   | 0.937       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.15        |
|    n_updates            | 560         |
|    policy_gradient_loss | -0.00395    |
|    value_loss           | 9.77        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 666      |
|    mean_reward     | 161      |
| time/              |          |
|    total_timesteps | 116000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 805      |
|    ep_rew_mean     | 25.3     |
| time/              |          |
|    fps             | 111      |
|    iterations      | 57       |
|    time_elapsed    | 1051     |
|    total_timesteps | 116736   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 542          |
|    mean_reward          | 215          |
| time/                   |              |
|    total_timesteps      | 117000       |
| train/                  |              |
|    approx_kl            | 0.0030642236 |
|    clip_fraction        | 0.0466       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.906       |
|    explained_variance   | 0.783        |
|    learning_rate        | 0.0003       |
|    loss                 | 1.18         |
|    n_updates            | 570          |
|    policy_gradient_loss | -0.00187     |
|    value_loss           | 22.4         |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 644      |
|    mean_reward     | 169      |
| time/              |          |
|    total_timesteps | 118000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 821      |
|    ep_rew_mean     | 31.2     |
| time/              |          |
|    fps             | 111      |
|    iterations      | 58       |
|    time_elapsed    | 1065     |
|    total_timesteps | 118784   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 810         |
|    mean_reward          | 57.9        |
| time/                   |             |
|    total_timesteps      | 119000      |
| train/                  |             |
|    approx_kl            | 0.004828564 |
|    clip_fraction        | 0.0224      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.826      |
|    explained_variance   | 0.794       |
|    learning_rate        | 0.0003      |
|    loss                 | 41.4        |
|    n_updates            | 580         |
|    policy_gradient_loss | -0.00199    |
|    value_loss           | 63.8        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 689      |
|    mean_reward     | 152      |
| time/              |          |
|    total_timesteps | 120000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 830      |
|    ep_rew_mean     | 32.8     |
| time/              |          |
|    fps             | 111      |
|    iterations      | 59       |
|    time_elapsed    | 1086     |
|    total_timesteps | 120832   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 645         |
|    mean_reward          | 186         |
| time/                   |             |
|    total_timesteps      | 121000      |
| train/                  |             |
|    approx_kl            | 0.008364737 |
|    clip_fraction        | 0.0757      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.89       |
|    explained_variance   | 0.93        |
|    learning_rate        | 0.0003      |
|    loss                 | 2.5         |
|    n_updates            | 590         |
|    policy_gradient_loss | -0.00289    |
|    value_loss           | 14.3        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 522      |
|    mean_reward     | 201      |
| time/              |          |
|    total_timesteps | 122000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 844      |
|    ep_rew_mean     | 35.9     |
| time/              |          |
|    fps             | 111      |
|    iterations      | 60       |
|    time_elapsed    | 1101     |
|    total_timesteps | 122880   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 574         |
|    mean_reward          | 181         |
| time/                   |             |
|    total_timesteps      | 123000      |
| train/                  |             |
|    approx_kl            | 0.009573888 |
|    clip_fraction        | 0.0581      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.873      |
|    explained_variance   | 0.683       |
|    learning_rate        | 0.0003      |
|    loss                 | 3.99        |
|    n_updates            | 600         |
|    policy_gradient_loss | -0.00192    |
|    value_loss           | 69.5        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 551      |
|    mean_reward     | 205      |
| time/              |          |
|    total_timesteps | 124000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 858      |
|    ep_rew_mean     | 39.7     |
| time/              |          |
|    fps             | 111      |
|    iterations      | 61       |
|    time_elapsed    | 1117     |
|    total_timesteps | 124928   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 502         |
|    mean_reward          | 206         |
| time/                   |             |
|    total_timesteps      | 125000      |
| train/                  |             |
|    approx_kl            | 0.006304797 |
|    clip_fraction        | 0.0808      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.885      |
|    explained_variance   | 0.961       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.397       |
|    n_updates            | 610         |
|    policy_gradient_loss | -0.000473   |
|    value_loss           | 3.35        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 513      |
|    mean_reward     | 157      |
| time/              |          |
|    total_timesteps | 126000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 865      |
|    ep_rew_mean     | 45.9     |
| time/              |          |
|    fps             | 112      |
|    iterations      | 62       |
|    time_elapsed    | 1131     |
|    total_timesteps | 126976   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 631          |
|    mean_reward          | 192          |
| time/                   |              |
|    total_timesteps      | 127000       |
| train/                  |              |
|    approx_kl            | 0.0030707638 |
|    clip_fraction        | 0.0286       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.86        |
|    explained_variance   | 0.808        |
|    learning_rate        | 0.0003       |
|    loss                 | 4.62         |
|    n_updates            | 620          |
|    policy_gradient_loss | -0.00134     |
|    value_loss           | 40.3         |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 464      |
|    mean_reward     | 214      |
| time/              |          |
|    total_timesteps | 128000   |
---------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 517      |
|    mean_reward     | 219      |
| time/              |          |
|    total_timesteps | 129000   |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 891      |
|    ep_rew_mean     | 52.3     |
| time/              |          |
|    fps             | 112      |
|    iterations      | 63       |
|    time_elapsed    | 1151     |
|    total_timesteps | 129024   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 424          |
|    mean_reward          | 169          |
| time/                   |              |
|    total_timesteps      | 130000       |
| train/                  |              |
|    approx_kl            | 0.0057708165 |
|    clip_fraction        | 0.0541       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.799       |
|    explained_variance   | 0.875        |
|    learning_rate        | 0.0003       |
|    loss                 | 1.33         |
|    n_updates            | 630          |
|    policy_gradient_loss | -0.00426     |
|    value_loss           | 6.47         |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 556      |
|    mean_reward     | 189      |
| time/              |          |
|    total_timesteps | 131000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 905      |
|    ep_rew_mean     | 56.1     |
| time/              |          |
|    fps             | 112      |
|    iterations      | 64       |
|    time_elapsed    | 1164     |
|    total_timesteps | 131072   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 482         |
|    mean_reward          | 163         |
| time/                   |             |
|    total_timesteps      | 132000      |
| train/                  |             |
|    approx_kl            | 0.006234444 |
|    clip_fraction        | 0.0476      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.813      |
|    explained_variance   | 0.984       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.676       |
|    n_updates            | 640         |
|    policy_gradient_loss | -0.00369    |
|    value_loss           | 1.98        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 566      |
|    mean_reward     | 130      |
| time/              |          |
|    total_timesteps | 133000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 916      |
|    ep_rew_mean     | 59       |
| time/              |          |
|    fps             | 112      |
|    iterations      | 65       |
|    time_elapsed    | 1179     |
|    total_timesteps | 133120   |
---------------------------------


----------------------------------------
| eval/                   |            |
|    mean_ep_length       | 574        |
|    mean_reward          | 182        |
| time/                   |            |
|    total_timesteps      | 134000     |
| train/                  |            |
|    approx_kl            | 0.00742779 |
|    clip_fraction        | 0.0816     |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.79      |
|    explained_variance   | 0.972      |
|    learning_rate        | 0.0003     |
|    loss                 | 2.87       |
|    n_updates            | 650        |
|    policy_gradient_loss | -0.00397   |
|    value_loss           | 5          |
----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 717      |
|    mean_reward     | 168      |
| time/              |          |
|    total_timesteps | 135000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 910      |
|    ep_rew_mean     | 64.1     |
| time/              |          |
|    fps             | 112      |
|    iterations      | 66       |
|    time_elapsed    | 1196     |
|    total_timesteps | 135168   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 502         |
|    mean_reward          | 198         |
| time/                   |             |
|    total_timesteps      | 136000      |
| train/                  |             |
|    approx_kl            | 0.004681248 |
|    clip_fraction        | 0.0248      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.741      |
|    explained_variance   | 0.823       |
|    learning_rate        | 0.0003      |
|    loss                 | 7.35        |
|    n_updates            | 660         |
|    policy_gradient_loss | -0.00348    |
|    value_loss           | 41.4        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 572      |
|    mean_reward     | 145      |
| time/              |          |
|    total_timesteps | 137000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 924      |
|    ep_rew_mean     | 68.9     |
| time/              |          |
|    fps             | 113      |
|    iterations      | 67       |
|    time_elapsed    | 1210     |
|    total_timesteps | 137216   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 474          |
|    mean_reward          | 152          |
| time/                   |              |
|    total_timesteps      | 138000       |
| train/                  |              |
|    approx_kl            | 0.0028025536 |
|    clip_fraction        | 0.0267       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.878       |
|    explained_variance   | 0.941        |
|    learning_rate        | 0.0003       |
|    loss                 | 4.06         |
|    n_updates            | 670          |
|    policy_gradient_loss | -0.000686    |
|    value_loss           | 9.36         |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 469      |
|    mean_reward     | 213      |
| time/              |          |
|    total_timesteps | 139000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 929      |
|    ep_rew_mean     | 72.6     |
| time/              |          |
|    fps             | 113      |
|    iterations      | 68       |
|    time_elapsed    | 1224     |
|    total_timesteps | 139264   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 519         |
|    mean_reward          | 205         |
| time/                   |             |
|    total_timesteps      | 140000      |
| train/                  |             |
|    approx_kl            | 0.021403588 |
|    clip_fraction        | 0.121       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.679      |
|    explained_variance   | 0.714       |
|    learning_rate        | 0.0003      |
|    loss                 | 30.1        |
|    n_updates            | 680         |
|    policy_gradient_loss | -0.00639    |
|    value_loss           | 58.6        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 514      |
|    mean_reward     | 201      |
| time/              |          |
|    total_timesteps | 141000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 943      |
|    ep_rew_mean     | 76.6     |
| time/              |          |
|    fps             | 114      |
|    iterations      | 69       |
|    time_elapsed    | 1238     |
|    total_timesteps | 141312   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 780         |
|    mean_reward          | 145         |
| time/                   |             |
|    total_timesteps      | 142000      |
| train/                  |             |
|    approx_kl            | 0.006804878 |
|    clip_fraction        | 0.0656      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.822      |
|    explained_variance   | 0.964       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.9         |
|    n_updates            | 690         |
|    policy_gradient_loss | -0.00305    |
|    value_loss           | 2.51        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 597      |
|    mean_reward     | 146      |
| time/              |          |
|    total_timesteps | 143000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 944      |
|    ep_rew_mean     | 80.1     |
| time/              |          |
|    fps             | 114      |
|    iterations      | 70       |
|    time_elapsed    | 1256     |
|    total_timesteps | 143360   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 456          |
|    mean_reward          | 167          |
| time/                   |              |
|    total_timesteps      | 144000       |
| train/                  |              |
|    approx_kl            | 0.0058301464 |
|    clip_fraction        | 0.0555       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.846       |
|    explained_variance   | 0.949        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.694        |
|    n_updates            | 700          |
|    policy_gradient_loss | -0.00366     |
|    value_loss           | 2.15         |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 487      |
|    mean_reward     | 212      |
| time/              |          |
|    total_timesteps | 145000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 942      |
|    ep_rew_mean     | 88.2     |
| time/              |          |
|    fps             | 114      |
|    iterations      | 71       |
|    time_elapsed    | 1269     |
|    total_timesteps | 145408   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 650         |
|    mean_reward          | 175         |
| time/                   |             |
|    total_timesteps      | 146000      |
| train/                  |             |
|    approx_kl            | 0.011483617 |
|    clip_fraction        | 0.0797      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.878      |
|    explained_variance   | 0.973       |
|    learning_rate        | 0.0003      |
|    loss                 | 3.4         |
|    n_updates            | 710         |
|    policy_gradient_loss | -0.00313    |
|    value_loss           | 3.9         |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 574      |
|    mean_reward     | 209      |
| time/              |          |
|    total_timesteps | 147000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 934      |
|    ep_rew_mean     | 93.2     |
| time/              |          |
|    fps             | 114      |
|    iterations      | 72       |
|    time_elapsed    | 1285     |
|    total_timesteps | 147456   |
---------------------------------


----------------------------------------
| eval/                   |            |
|    mean_ep_length       | 503        |
|    mean_reward          | 215        |
| time/                   |            |
|    total_timesteps      | 148000     |
| train/                  |            |
|    approx_kl            | 0.01943962 |
|    clip_fraction        | 0.0688     |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.888     |
|    explained_variance   | 0.801      |
|    learning_rate        | 0.0003     |
|    loss                 | 2.67       |
|    n_updates            | 720        |
|    policy_gradient_loss | -0.00349   |
|    value_loss           | 29.7       |
----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 589      |
|    mean_reward     | 202      |
| time/              |          |
|    total_timesteps | 149000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 940      |
|    ep_rew_mean     | 97.9     |
| time/              |          |
|    fps             | 114      |
|    iterations      | 73       |
|    time_elapsed    | 1300     |
|    total_timesteps | 149504   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 566          |
|    mean_reward          | 204          |
| time/                   |              |
|    total_timesteps      | 150000       |
| train/                  |              |
|    approx_kl            | 0.0035903798 |
|    clip_fraction        | 0.0907       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.852       |
|    explained_variance   | 0.956        |
|    learning_rate        | 0.0003       |
|    loss                 | 1.86         |
|    n_updates            | 730          |
|    policy_gradient_loss | -0.00323     |
|    value_loss           | 4.39         |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 462      |
|    mean_reward     | 220      |
| time/              |          |
|    total_timesteps | 151000   |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 940      |
|    ep_rew_mean     | 98.9     |
| time/              |          |
|    fps             | 115      |
|    iterations      | 74       |
|    time_elapsed    | 1314     |
|    total_timesteps | 151552   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 478          |
|    mean_reward          | 172          |
| time/                   |              |
|    total_timesteps      | 152000       |
| train/                  |              |
|    approx_kl            | 0.0071716188 |
|    clip_fraction        | 0.0656       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.923       |
|    explained_variance   | 0.985        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.546        |
|    n_updates            | 740          |
|    policy_gradient_loss | -0.00366     |
|    value_loss           | 1.25         |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 505      |
|    mean_reward     | 213      |
| time/              |          |
|    total_timesteps | 153000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 933      |
|    ep_rew_mean     | 99.1     |
| time/              |          |
|    fps             | 115      |
|    iterations      | 75       |
|    time_elapsed    | 1328     |
|    total_timesteps | 153600   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 576          |
|    mean_reward          | 199          |
| time/                   |              |
|    total_timesteps      | 154000       |
| train/                  |              |
|    approx_kl            | 0.0061521814 |
|    clip_fraction        | 0.0734       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.908       |
|    explained_variance   | 0.722        |
|    learning_rate        | 0.0003       |
|    loss                 | 29.1         |
|    n_updates            | 750          |
|    policy_gradient_loss | -0.00399     |
|    value_loss           | 64.1         |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 475      |
|    mean_reward     | 226      |
| time/              |          |
|    total_timesteps | 155000   |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 940      |
|    ep_rew_mean     | 103      |
| time/              |          |
|    fps             | 115      |
|    iterations      | 76       |
|    time_elapsed    | 1343     |
|    total_timesteps | 155648   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 462         |
|    mean_reward          | 210         |
| time/                   |             |
|    total_timesteps      | 156000      |
| train/                  |             |
|    approx_kl            | 0.010662386 |
|    clip_fraction        | 0.0402      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.863      |
|    explained_variance   | 0.976       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.63        |
|    n_updates            | 760         |
|    policy_gradient_loss | -0.00447    |
|    value_loss           | 3.22        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 566      |
|    mean_reward     | 201      |
| time/              |          |
|    total_timesteps | 157000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 950      |
|    ep_rew_mean     | 111      |
| time/              |          |
|    fps             | 116      |
|    iterations      | 77       |
|    time_elapsed    | 1357     |
|    total_timesteps | 157696   |
---------------------------------


----------------------------------------
| eval/                   |            |
|    mean_ep_length       | 541        |
|    mean_reward          | 199        |
| time/                   |            |
|    total_timesteps      | 158000     |
| train/                  |            |
|    approx_kl            | 0.01433799 |
|    clip_fraction        | 0.0807     |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.843     |
|    explained_variance   | 0.907      |
|    learning_rate        | 0.0003     |
|    loss                 | 24.7       |
|    n_updates            | 770        |
|    policy_gradient_loss | -0.00908   |
|    value_loss           | 27.9       |
----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 531      |
|    mean_reward     | 201      |
| time/              |          |
|    total_timesteps | 159000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 956      |
|    ep_rew_mean     | 117      |
| time/              |          |
|    fps             | 116      |
|    iterations      | 78       |
|    time_elapsed    | 1371     |
|    total_timesteps | 159744   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 538          |
|    mean_reward          | 203          |
| time/                   |              |
|    total_timesteps      | 160000       |
| train/                  |              |
|    approx_kl            | 0.0013860712 |
|    clip_fraction        | 0.00166      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.821       |
|    explained_variance   | 0.747        |
|    learning_rate        | 0.0003       |
|    loss                 | 4.35         |
|    n_updates            | 780          |
|    policy_gradient_loss | -0.00218     |
|    value_loss           | 57.2         |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 429      |
|    mean_reward     | 174      |
| time/              |          |
|    total_timesteps | 161000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 957      |
|    ep_rew_mean     | 121      |
| time/              |          |
|    fps             | 116      |
|    iterations      | 79       |
|    time_elapsed    | 1384     |
|    total_timesteps | 161792   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 420         |
|    mean_reward          | 182         |
| time/                   |             |
|    total_timesteps      | 162000      |
| train/                  |             |
|    approx_kl            | 0.003501931 |
|    clip_fraction        | 0.0172      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.764      |
|    explained_variance   | 0.914       |
|    learning_rate        | 0.0003      |
|    loss                 | 61.6        |
|    n_updates            | 790         |
|    policy_gradient_loss | -0.00144    |
|    value_loss           | 45          |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 570      |
|    mean_reward     | 196      |
| time/              |          |
|    total_timesteps | 163000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 961      |
|    ep_rew_mean     | 127      |
| time/              |          |
|    fps             | 117      |
|    iterations      | 80       |
|    time_elapsed    | 1398     |
|    total_timesteps | 163840   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 536         |
|    mean_reward          | 220         |
| time/                   |             |
|    total_timesteps      | 164000      |
| train/                  |             |
|    approx_kl            | 0.006029009 |
|    clip_fraction        | 0.0295      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.768      |
|    explained_variance   | 0.754       |
|    learning_rate        | 0.0003      |
|    loss                 | 30.6        |
|    n_updates            | 800         |
|    policy_gradient_loss | -0.00311    |
|    value_loss           | 68.6        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 409      |
|    mean_reward     | 174      |
| time/              |          |
|    total_timesteps | 165000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 966      |
|    ep_rew_mean     | 129      |
| time/              |          |
|    fps             | 117      |
|    iterations      | 81       |
|    time_elapsed    | 1411     |
|    total_timesteps | 165888   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 559          |
|    mean_reward          | 208          |
| time/                   |              |
|    total_timesteps      | 166000       |
| train/                  |              |
|    approx_kl            | 0.0036128252 |
|    clip_fraction        | 0.0408       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.714       |
|    explained_variance   | 0.956        |
|    learning_rate        | 0.0003       |
|    loss                 | 1.5          |
|    n_updates            | 810          |
|    policy_gradient_loss | -0.00309     |
|    value_loss           | 2.35         |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 641      |
|    mean_reward     | 179      |
| time/              |          |
|    total_timesteps | 167000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 958      |
|    ep_rew_mean     | 134      |
| time/              |          |
|    fps             | 117      |
|    iterations      | 82       |
|    time_elapsed    | 1426     |
|    total_timesteps | 167936   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 410         |
|    mean_reward          | 185         |
| time/                   |             |
|    total_timesteps      | 168000      |
| train/                  |             |
|    approx_kl            | 0.005385774 |
|    clip_fraction        | 0.0496      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.716      |
|    explained_variance   | 0.768       |
|    learning_rate        | 0.0003      |
|    loss                 | 4.68        |
|    n_updates            | 820         |
|    policy_gradient_loss | -0.00325    |
|    value_loss           | 75.9        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 516      |
|    mean_reward     | 169      |
| time/              |          |
|    total_timesteps | 169000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 953      |
|    ep_rew_mean     | 140      |
| time/              |          |
|    fps             | 118      |
|    iterations      | 83       |
|    time_elapsed    | 1439     |
|    total_timesteps | 169984   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 420          |
|    mean_reward          | 228          |
| time/                   |              |
|    total_timesteps      | 170000       |
| train/                  |              |
|    approx_kl            | 0.0039230213 |
|    clip_fraction        | 0.0361       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.709       |
|    explained_variance   | 0.609        |
|    learning_rate        | 0.0003       |
|    loss                 | 7.21         |
|    n_updates            | 830          |
|    policy_gradient_loss | -0.00517     |
|    value_loss           | 77.2         |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 424      |
|    mean_reward     | 235      |
| time/              |          |
|    total_timesteps | 171000   |
---------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 417      |
|    mean_reward     | 233      |
| time/              |          |
|    total_timesteps | 172000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 960      |
|    ep_rew_mean     | 144      |
| time/              |          |
|    fps             | 118      |
|    iterations      | 84       |
|    time_elapsed    | 1453     |
|    total_timesteps | 172032   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 405         |
|    mean_reward          | 235         |
| time/                   |             |
|    total_timesteps      | 173000      |
| train/                  |             |
|    approx_kl            | 0.007562228 |
|    clip_fraction        | 0.043       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.731      |
|    explained_variance   | 0.88        |
|    learning_rate        | 0.0003      |
|    loss                 | 5.66        |
|    n_updates            | 840         |
|    policy_gradient_loss | -0.00315    |
|    value_loss           | 41.9        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 391      |
|    mean_reward     | 198      |
| time/              |          |
|    total_timesteps | 174000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 953      |
|    ep_rew_mean     | 146      |
| time/              |          |
|    fps             | 118      |
|    iterations      | 85       |
|    time_elapsed    | 1465     |
|    total_timesteps | 174080   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 390          |
|    mean_reward          | 231          |
| time/                   |              |
|    total_timesteps      | 175000       |
| train/                  |              |
|    approx_kl            | 0.0032278656 |
|    clip_fraction        | 0.0229       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.699       |
|    explained_variance   | 0.908        |
|    learning_rate        | 0.0003       |
|    loss                 | 14.8         |
|    n_updates            | 850          |
|    policy_gradient_loss | -0.00163     |
|    value_loss           | 32           |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 383      |
|    mean_reward     | 230      |
| time/              |          |
|    total_timesteps | 176000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 950      |
|    ep_rew_mean     | 151      |
| time/              |          |
|    fps             | 119      |
|    iterations      | 86       |
|    time_elapsed    | 1476     |
|    total_timesteps | 176128   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 370          |
|    mean_reward          | 225          |
| time/                   |              |
|    total_timesteps      | 177000       |
| train/                  |              |
|    approx_kl            | 0.0073856255 |
|    clip_fraction        | 0.0413       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.731       |
|    explained_variance   | 0.502        |
|    learning_rate        | 0.0003       |
|    loss                 | 33.3         |
|    n_updates            | 860          |
|    policy_gradient_loss | -0.00372     |
|    value_loss           | 35.7         |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 379      |
|    mean_reward     | 177      |
| time/              |          |
|    total_timesteps | 178000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 950      |
|    ep_rew_mean     | 156      |
| time/              |          |
|    fps             | 119      |
|    iterations      | 87       |
|    time_elapsed    | 1486     |
|    total_timesteps | 178176   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 552          |
|    mean_reward          | 211          |
| time/                   |              |
|    total_timesteps      | 179000       |
| train/                  |              |
|    approx_kl            | 0.0093033435 |
|    clip_fraction        | 0.0893       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.733       |
|    explained_variance   | 0.697        |
|    learning_rate        | 0.0003       |
|    loss                 | 16.9         |
|    n_updates            | 870          |
|    policy_gradient_loss | -0.00294     |
|    value_loss           | 39.9         |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 374      |
|    mean_reward     | 244      |
| time/              |          |
|    total_timesteps | 180000   |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 917      |
|    ep_rew_mean     | 162      |
| time/              |          |
|    fps             | 120      |
|    iterations      | 88       |
|    time_elapsed    | 1498     |
|    total_timesteps | 180224   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 356          |
|    mean_reward          | 195          |
| time/                   |              |
|    total_timesteps      | 181000       |
| train/                  |              |
|    approx_kl            | 0.0029510458 |
|    clip_fraction        | 0.0129       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.807       |
|    explained_variance   | 0.423        |
|    learning_rate        | 0.0003       |
|    loss                 | 94.4         |
|    n_updates            | 880          |
|    policy_gradient_loss | -0.00256     |
|    value_loss           | 130          |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 353      |
|    mean_reward     | 250      |
| time/              |          |
|    total_timesteps | 182000   |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 927      |
|    ep_rew_mean     | 167      |
| time/              |          |
|    fps             | 120      |
|    iterations      | 89       |
|    time_elapsed    | 1509     |
|    total_timesteps | 182272   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 349         |
|    mean_reward          | 192         |
| time/                   |             |
|    total_timesteps      | 183000      |
| train/                  |             |
|    approx_kl            | 0.010321873 |
|    clip_fraction        | 0.0727      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.708      |
|    explained_variance   | 0.726       |
|    learning_rate        | 0.0003      |
|    loss                 | 2.63        |
|    n_updates            | 890         |
|    policy_gradient_loss | -0.00192    |
|    value_loss           | 24.7        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 323      |
|    mean_reward     | 40.7     |
| time/              |          |
|    total_timesteps | 184000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 901      |
|    ep_rew_mean     | 174      |
| time/              |          |
|    fps             | 121      |
|    iterations      | 90       |
|    time_elapsed    | 1519     |
|    total_timesteps | 184320   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 372         |
|    mean_reward          | 184         |
| time/                   |             |
|    total_timesteps      | 185000      |
| train/                  |             |
|    approx_kl            | 0.007419655 |
|    clip_fraction        | 0.0532      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.765      |
|    explained_variance   | 0.575       |
|    learning_rate        | 0.0003      |
|    loss                 | 151         |
|    n_updates            | 900         |
|    policy_gradient_loss | -0.00554    |
|    value_loss           | 137         |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 370      |
|    mean_reward     | 232      |
| time/              |          |
|    total_timesteps | 186000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 878      |
|    ep_rew_mean     | 176      |
| time/              |          |
|    fps             | 121      |
|    iterations      | 91       |
|    time_elapsed    | 1529     |
|    total_timesteps | 186368   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 561         |
|    mean_reward          | 202         |
| time/                   |             |
|    total_timesteps      | 187000      |
| train/                  |             |
|    approx_kl            | 0.003055512 |
|    clip_fraction        | 0.0252      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.739      |
|    explained_variance   | 0.436       |
|    learning_rate        | 0.0003      |
|    loss                 | 101         |
|    n_updates            | 910         |
|    policy_gradient_loss | -0.00341    |
|    value_loss           | 201         |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 348      |
|    mean_reward     | 198      |
| time/              |          |
|    total_timesteps | 188000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 843      |
|    ep_rew_mean     | 187      |
| time/              |          |
|    fps             | 122      |
|    iterations      | 92       |
|    time_elapsed    | 1541     |
|    total_timesteps | 188416   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 367         |
|    mean_reward          | 231         |
| time/                   |             |
|    total_timesteps      | 189000      |
| train/                  |             |
|    approx_kl            | 0.008410644 |
|    clip_fraction        | 0.0694      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.699      |
|    explained_variance   | 0.618       |
|    learning_rate        | 0.0003      |
|    loss                 | 29.7        |
|    n_updates            | 920         |
|    policy_gradient_loss | -0.0062     |
|    value_loss           | 66.7        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 478      |
|    mean_reward     | 212      |
| time/              |          |
|    total_timesteps | 190000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 803      |
|    ep_rew_mean     | 193      |
| time/              |          |
|    fps             | 122      |
|    iterations      | 93       |
|    time_elapsed    | 1552     |
|    total_timesteps | 190464   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 363         |
|    mean_reward          | 231         |
| time/                   |             |
|    total_timesteps      | 191000      |
| train/                  |             |
|    approx_kl            | 0.006006679 |
|    clip_fraction        | 0.0644      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.767      |
|    explained_variance   | 0.637       |
|    learning_rate        | 0.0003      |
|    loss                 | 10.1        |
|    n_updates            | 930         |
|    policy_gradient_loss | -0.00224    |
|    value_loss           | 57.3        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 350      |
|    mean_reward     | 238      |
| time/              |          |
|    total_timesteps | 192000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 765      |
|    ep_rew_mean     | 198      |
| time/              |          |
|    fps             | 123      |
|    iterations      | 94       |
|    time_elapsed    | 1562     |
|    total_timesteps | 192512   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 348         |
|    mean_reward          | 240         |
| time/                   |             |
|    total_timesteps      | 193000      |
| train/                  |             |
|    approx_kl            | 0.004542345 |
|    clip_fraction        | 0.0415      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.72       |
|    explained_variance   | 0.436       |
|    learning_rate        | 0.0003      |
|    loss                 | 43.9        |
|    n_updates            | 940         |
|    policy_gradient_loss | -0.00298    |
|    value_loss           | 231         |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 356      |
|    mean_reward     | 160      |
| time/              |          |
|    total_timesteps | 194000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 760      |
|    ep_rew_mean     | 200      |
| time/              |          |
|    fps             | 123      |
|    iterations      | 95       |
|    time_elapsed    | 1573     |
|    total_timesteps | 194560   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 334          |
|    mean_reward          | 232          |
| time/                   |              |
|    total_timesteps      | 195000       |
| train/                  |              |
|    approx_kl            | 0.0053206272 |
|    clip_fraction        | 0.0506       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.707       |
|    explained_variance   | 0.776        |
|    learning_rate        | 0.0003       |
|    loss                 | 33           |
|    n_updates            | 950          |
|    policy_gradient_loss | -0.000223    |
|    value_loss           | 21.1         |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 485      |
|    mean_reward     | 213      |
| time/              |          |
|    total_timesteps | 196000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 724      |
|    ep_rew_mean     | 212      |
| time/              |          |
|    fps             | 124      |
|    iterations      | 96       |
|    time_elapsed    | 1584     |
|    total_timesteps | 196608   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 535          |
|    mean_reward          | 222          |
| time/                   |              |
|    total_timesteps      | 197000       |
| train/                  |              |
|    approx_kl            | 0.0088729095 |
|    clip_fraction        | 0.0804       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.745       |
|    explained_variance   | 0.733        |
|    learning_rate        | 0.0003       |
|    loss                 | 39.8         |
|    n_updates            | 960          |
|    policy_gradient_loss | -0.00345     |
|    value_loss           | 71.8         |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 280      |
|    mean_reward     | 89.5     |
| time/              |          |
|    total_timesteps | 198000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 693      |
|    ep_rew_mean     | 219      |
| time/              |          |
|    fps             | 124      |
|    iterations      | 97       |
|    time_elapsed    | 1594     |
|    total_timesteps | 198656   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 357          |
|    mean_reward          | 180          |
| time/                   |              |
|    total_timesteps      | 199000       |
| train/                  |              |
|    approx_kl            | 0.0035459944 |
|    clip_fraction        | 0.0236       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.723       |
|    explained_variance   | 0.471        |
|    learning_rate        | 0.0003       |
|    loss                 | 38.1         |
|    n_updates            | 970          |
|    policy_gradient_loss | -0.0025      |
|    value_loss           | 105          |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 322      |
|    mean_reward     | 189      |
| time/              |          |
|    total_timesteps | 200000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 679      |
|    ep_rew_mean     | 222      |
| time/              |          |
|    fps             | 124      |
|    iterations      | 98       |
|    time_elapsed    | 1605     |
|    total_timesteps | 200704   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 308          |
|    mean_reward          | 188          |
| time/                   |              |
|    total_timesteps      | 201000       |
| train/                  |              |
|    approx_kl            | 0.0047111106 |
|    clip_fraction        | 0.0506       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.695       |
|    explained_variance   | 0.621        |
|    learning_rate        | 0.0003       |
|    loss                 | 56.9         |
|    n_updates            | 980          |
|    policy_gradient_loss | -0.00973     |
|    value_loss           | 132          |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 470      |
|    mean_reward     | 227      |
| time/              |          |
|    total_timesteps | 202000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 668      |
|    ep_rew_mean     | 221      |
| time/              |          |
|    fps             | 125      |
|    iterations      | 99       |
|    time_elapsed    | 1616     |
|    total_timesteps | 202752   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 464         |
|    mean_reward          | 215         |
| time/                   |             |
|    total_timesteps      | 203000      |
| train/                  |             |
|    approx_kl            | 0.013630116 |
|    clip_fraction        | 0.163       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.701      |
|    explained_variance   | 0.761       |
|    learning_rate        | 0.0003      |
|    loss                 | 8.88        |
|    n_updates            | 990         |
|    policy_gradient_loss | -0.0061     |
|    value_loss           | 42.9        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 509      |
|    mean_reward     | 148      |
| time/              |          |
|    total_timesteps | 204000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 658      |
|    ep_rew_mean     | 223      |
| time/              |          |
|    fps             | 125      |
|    iterations      | 100      |
|    time_elapsed    | 1629     |
|    total_timesteps | 204800   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 295         |
|    mean_reward          | 252         |
| time/                   |             |
|    total_timesteps      | 205000      |
| train/                  |             |
|    approx_kl            | 0.012264257 |
|    clip_fraction        | 0.152       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.727      |
|    explained_variance   | 0.851       |
|    learning_rate        | 0.0003      |
|    loss                 | 2.8         |
|    n_updates            | 1000        |
|    policy_gradient_loss | -0.00273    |
|    value_loss           | 31.1        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 447      |
|    mean_reward     | 169      |
| time/              |          |
|    total_timesteps | 206000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 635      |
|    ep_rew_mean     | 227      |
| time/              |          |
|    fps             | 126      |
|    iterations      | 101      |
|    time_elapsed    | 1640     |
|    total_timesteps | 206848   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 324         |
|    mean_reward          | 240         |
| time/                   |             |
|    total_timesteps      | 207000      |
| train/                  |             |
|    approx_kl            | 0.004314027 |
|    clip_fraction        | 0.0451      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.697      |
|    explained_variance   | 0.766       |
|    learning_rate        | 0.0003      |
|    loss                 | 47.1        |
|    n_updates            | 1010        |
|    policy_gradient_loss | -0.00248    |
|    value_loss           | 89.2        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 458      |
|    mean_reward     | 213      |
| time/              |          |
|    total_timesteps | 208000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 586      |
|    ep_rew_mean     | 223      |
| time/              |          |
|    fps             | 126      |
|    iterations      | 102      |
|    time_elapsed    | 1650     |
|    total_timesteps | 208896   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 456          |
|    mean_reward          | 209          |
| time/                   |              |
|    total_timesteps      | 209000       |
| train/                  |              |
|    approx_kl            | 0.0032995997 |
|    clip_fraction        | 0.0153       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.794       |
|    explained_variance   | 0.497        |
|    learning_rate        | 0.0003       |
|    loss                 | 150          |
|    n_updates            | 1020         |
|    policy_gradient_loss | -0.00148     |
|    value_loss           | 343          |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 295      |
|    mean_reward     | 194      |
| time/              |          |
|    total_timesteps | 210000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 539      |
|    ep_rew_mean     | 232      |
| time/              |          |
|    fps             | 126      |
|    iterations      | 103      |
|    time_elapsed    | 1661     |
|    total_timesteps | 210944   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 302         |
|    mean_reward          | 244         |
| time/                   |             |
|    total_timesteps      | 211000      |
| train/                  |             |
|    approx_kl            | 0.006399306 |
|    clip_fraction        | 0.0665      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.633      |
|    explained_variance   | 0.762       |
|    learning_rate        | 0.0003      |
|    loss                 | 42.9        |
|    n_updates            | 1030        |
|    policy_gradient_loss | -0.00388    |
|    value_loss           | 140         |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 364      |
|    mean_reward     | 101      |
| time/              |          |
|    total_timesteps | 212000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 512      |
|    ep_rew_mean     | 235      |
| time/              |          |
|    fps             | 127      |
|    iterations      | 104      |
|    time_elapsed    | 1671     |
|    total_timesteps | 212992   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 317         |
|    mean_reward          | 260         |
| time/                   |             |
|    total_timesteps      | 213000      |
| train/                  |             |
|    approx_kl            | 0.006041942 |
|    clip_fraction        | 0.0443      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.693      |
|    explained_variance   | 0.66        |
|    learning_rate        | 0.0003      |
|    loss                 | 40.2        |
|    n_updates            | 1040        |
|    policy_gradient_loss | -0.00261    |
|    value_loss           | 142         |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 462      |
|    mean_reward     | 166      |
| time/              |          |
|    total_timesteps | 214000   |
---------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 450      |
|    mean_reward     | 224      |
| time/              |          |
|    total_timesteps | 215000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 472      |
|    ep_rew_mean     | 232      |
| time/              |          |
|    fps             | 127      |
|    iterations      | 105      |
|    time_elapsed    | 1684     |
|    total_timesteps | 215040   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 339          |
|    mean_reward          | 230          |
| time/                   |              |
|    total_timesteps      | 216000       |
| train/                  |              |
|    approx_kl            | 0.0027542347 |
|    clip_fraction        | 0.0161       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.719       |
|    explained_variance   | 0.506        |
|    learning_rate        | 0.0003       |
|    loss                 | 59.4         |
|    n_updates            | 1050         |
|    policy_gradient_loss | -0.00213     |
|    value_loss           | 210          |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 301      |
|    mean_reward     | 253      |
| time/              |          |
|    total_timesteps | 217000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 425      |
|    ep_rew_mean     | 217      |
| time/              |          |
|    fps             | 128      |
|    iterations      | 106      |
|    time_elapsed    | 1694     |
|    total_timesteps | 217088   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 294         |
|    mean_reward          | 187         |
| time/                   |             |
|    total_timesteps      | 218000      |
| train/                  |             |
|    approx_kl            | 0.004898799 |
|    clip_fraction        | 0.037       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.795      |
|    explained_variance   | 0.716       |
|    learning_rate        | 0.0003      |
|    loss                 | 113         |
|    n_updates            | 1060        |
|    policy_gradient_loss | -0.00282    |
|    value_loss           | 244         |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 319      |
|    mean_reward     | 239      |
| time/              |          |
|    total_timesteps | 219000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 404      |
|    ep_rew_mean     | 216      |
| time/              |          |
|    fps             | 128      |
|    iterations      | 107      |
|    time_elapsed    | 1703     |
|    total_timesteps | 219136   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 309         |
|    mean_reward          | 194         |
| time/                   |             |
|    total_timesteps      | 220000      |
| train/                  |             |
|    approx_kl            | 0.005494898 |
|    clip_fraction        | 0.0351      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.689      |
|    explained_variance   | 0.66        |
|    learning_rate        | 0.0003      |
|    loss                 | 56.4        |
|    n_updates            | 1070        |
|    policy_gradient_loss | -0.00162    |
|    value_loss           | 129         |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 323      |
|    mean_reward     | 234      |
| time/              |          |
|    total_timesteps | 221000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 385      |
|    ep_rew_mean     | 212      |
| time/              |          |
|    fps             | 129      |
|    iterations      | 108      |
|    time_elapsed    | 1712     |
|    total_timesteps | 221184   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 346         |
|    mean_reward          | 189         |
| time/                   |             |
|    total_timesteps      | 222000      |
| train/                  |             |
|    approx_kl            | 0.005241768 |
|    clip_fraction        | 0.0771      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.781      |
|    explained_variance   | 0.644       |
|    learning_rate        | 0.0003      |
|    loss                 | 25.6        |
|    n_updates            | 1080        |
|    policy_gradient_loss | -0.0049     |
|    value_loss           | 145         |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 406      |
|    mean_reward     | 206      |
| time/              |          |
|    total_timesteps | 223000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 391      |
|    ep_rew_mean     | 214      |
| time/              |          |
|    fps             | 129      |
|    iterations      | 109      |
|    time_elapsed    | 1723     |
|    total_timesteps | 223232   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 365         |
|    mean_reward          | 175         |
| time/                   |             |
|    total_timesteps      | 224000      |
| train/                  |             |
|    approx_kl            | 0.009969266 |
|    clip_fraction        | 0.0877      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.702      |
|    explained_variance   | 0.759       |
|    learning_rate        | 0.0003      |
|    loss                 | 36.3        |
|    n_updates            | 1090        |
|    policy_gradient_loss | -0.00398    |
|    value_loss           | 88.4        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 352      |
|    mean_reward     | 238      |
| time/              |          |
|    total_timesteps | 225000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 386      |
|    ep_rew_mean     | 214      |
| time/              |          |
|    fps             | 129      |
|    iterations      | 110      |
|    time_elapsed    | 1733     |
|    total_timesteps | 225280   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 315          |
|    mean_reward          | 256          |
| time/                   |              |
|    total_timesteps      | 226000       |
| train/                  |              |
|    approx_kl            | 0.0039551146 |
|    clip_fraction        | 0.0556       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.717       |
|    explained_variance   | 0.705        |
|    learning_rate        | 0.0003       |
|    loss                 | 17.3         |
|    n_updates            | 1100         |
|    policy_gradient_loss | -0.0027      |
|    value_loss           | 131          |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 306      |
|    mean_reward     | 262      |
| time/              |          |
|    total_timesteps | 227000   |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 387      |
|    ep_rew_mean     | 212      |
| time/              |          |
|    fps             | 130      |
|    iterations      | 111      |
|    time_elapsed    | 1742     |
|    total_timesteps | 227328   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 426         |
|    mean_reward          | 102         |
| time/                   |             |
|    total_timesteps      | 228000      |
| train/                  |             |
|    approx_kl            | 0.007924998 |
|    clip_fraction        | 0.0792      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.761      |
|    explained_variance   | 0.736       |
|    learning_rate        | 0.0003      |
|    loss                 | 32.2        |
|    n_updates            | 1110        |
|    policy_gradient_loss | -0.0028     |
|    value_loss           | 82.1        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 296      |
|    mean_reward     | 199      |
| time/              |          |
|    total_timesteps | 229000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 395      |
|    ep_rew_mean     | 215      |
| time/              |          |
|    fps             | 130      |
|    iterations      | 112      |
|    time_elapsed    | 1753     |
|    total_timesteps | 229376   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 322          |
|    mean_reward          | 249          |
| time/                   |              |
|    total_timesteps      | 230000       |
| train/                  |              |
|    approx_kl            | 0.0066433176 |
|    clip_fraction        | 0.0817       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.72        |
|    explained_variance   | 0.73         |
|    learning_rate        | 0.0003       |
|    loss                 | 18.6         |
|    n_updates            | 1120         |
|    policy_gradient_loss | -0.00526     |
|    value_loss           | 59.7         |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 295      |
|    mean_reward     | 240      |
| time/              |          |
|    total_timesteps | 231000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 393      |
|    ep_rew_mean     | 216      |
| time/              |          |
|    fps             | 131      |
|    iterations      | 113      |
|    time_elapsed    | 1762     |
|    total_timesteps | 231424   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 350         |
|    mean_reward          | 238         |
| time/                   |             |
|    total_timesteps      | 232000      |
| train/                  |             |
|    approx_kl            | 0.008414761 |
|    clip_fraction        | 0.0846      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.73       |
|    explained_variance   | 0.827       |
|    learning_rate        | 0.0003      |
|    loss                 | 16.5        |
|    n_updates            | 1130        |
|    policy_gradient_loss | -0.00451    |
|    value_loss           | 44          |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 327      |
|    mean_reward     | 206      |
| time/              |          |
|    total_timesteps | 233000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 390      |
|    ep_rew_mean     | 216      |
| time/              |          |
|    fps             | 131      |
|    iterations      | 114      |
|    time_elapsed    | 1772     |
|    total_timesteps | 233472   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 357         |
|    mean_reward          | 185         |
| time/                   |             |
|    total_timesteps      | 234000      |
| train/                  |             |
|    approx_kl            | 0.004886287 |
|    clip_fraction        | 0.0424      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.754      |
|    explained_variance   | 0.721       |
|    learning_rate        | 0.0003      |
|    loss                 | 99.2        |
|    n_updates            | 1140        |
|    policy_gradient_loss | -0.00165    |
|    value_loss           | 112         |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 348      |
|    mean_reward     | 199      |
| time/              |          |
|    total_timesteps | 235000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 390      |
|    ep_rew_mean     | 215      |
| time/              |          |
|    fps             | 132      |
|    iterations      | 115      |
|    time_elapsed    | 1782     |
|    total_timesteps | 235520   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 508          |
|    mean_reward          | 194          |
| time/                   |              |
|    total_timesteps      | 236000       |
| train/                  |              |
|    approx_kl            | 0.0049134707 |
|    clip_fraction        | 0.0431       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.705       |
|    explained_variance   | 0.739        |
|    learning_rate        | 0.0003       |
|    loss                 | 21.2         |
|    n_updates            | 1150         |
|    policy_gradient_loss | -0.00093     |
|    value_loss           | 85.5         |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 438      |
|    mean_reward     | 162      |
| time/              |          |
|    total_timesteps | 237000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 380      |
|    ep_rew_mean     | 216      |
| time/              |          |
|    fps             | 132      |
|    iterations      | 116      |
|    time_elapsed    | 1794     |
|    total_timesteps | 237568   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 326         |
|    mean_reward          | 256         |
| time/                   |             |
|    total_timesteps      | 238000      |
| train/                  |             |
|    approx_kl            | 0.009439999 |
|    clip_fraction        | 0.0662      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.665      |
|    explained_variance   | 0.646       |
|    learning_rate        | 0.0003      |
|    loss                 | 32.9        |
|    n_updates            | 1160        |
|    policy_gradient_loss | -0.00922    |
|    value_loss           | 105         |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 284      |
|    mean_reward     | 151      |
| time/              |          |
|    total_timesteps | 239000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 362      |
|    ep_rew_mean     | 220      |
| time/              |          |
|    fps             | 132      |
|    iterations      | 117      |
|    time_elapsed    | 1803     |
|    total_timesteps | 239616   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 314          |
|    mean_reward          | 154          |
| time/                   |              |
|    total_timesteps      | 240000       |
| train/                  |              |
|    approx_kl            | 0.0064168805 |
|    clip_fraction        | 0.0723       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.727       |
|    explained_variance   | 0.83         |
|    learning_rate        | 0.0003       |
|    loss                 | 11.5         |
|    n_updates            | 1170         |
|    policy_gradient_loss | -0.00336     |
|    value_loss           | 47.5         |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 291      |
|    mean_reward     | 205      |
| time/              |          |
|    total_timesteps | 241000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 361      |
|    ep_rew_mean     | 221      |
| time/              |          |
|    fps             | 133      |
|    iterations      | 118      |
|    time_elapsed    | 1813     |
|    total_timesteps | 241664   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 291         |
|    mean_reward          | 233         |
| time/                   |             |
|    total_timesteps      | 242000      |
| train/                  |             |
|    approx_kl            | 0.003251621 |
|    clip_fraction        | 0.0252      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.675      |
|    explained_variance   | 0.786       |
|    learning_rate        | 0.0003      |
|    loss                 | 172         |
|    n_updates            | 1180        |
|    policy_gradient_loss | -0.000824   |
|    value_loss           | 109         |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 446      |
|    mean_reward     | 222      |
| time/              |          |
|    total_timesteps | 243000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 360      |
|    ep_rew_mean     | 220      |
| time/              |          |
|    fps             | 133      |
|    iterations      | 119      |
|    time_elapsed    | 1823     |
|    total_timesteps | 243712   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 270          |
|    mean_reward          | 214          |
| time/                   |              |
|    total_timesteps      | 244000       |
| train/                  |              |
|    approx_kl            | 0.0051771207 |
|    clip_fraction        | 0.0207       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.793       |
|    explained_variance   | 0.729        |
|    learning_rate        | 0.0003       |
|    loss                 | 5.49         |
|    n_updates            | 1190         |
|    policy_gradient_loss | -0.00265     |
|    value_loss           | 102          |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 325      |
|    mean_reward     | 190      |
| time/              |          |
|    total_timesteps | 245000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 371      |
|    ep_rew_mean     | 233      |
| time/              |          |
|    fps             | 134      |
|    iterations      | 120      |
|    time_elapsed    | 1832     |
|    total_timesteps | 245760   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 308         |
|    mean_reward          | 258         |
| time/                   |             |
|    total_timesteps      | 246000      |
| train/                  |             |
|    approx_kl            | 0.012244673 |
|    clip_fraction        | 0.0785      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.722      |
|    explained_variance   | 0.7         |
|    learning_rate        | 0.0003      |
|    loss                 | 20.8        |
|    n_updates            | 1200        |
|    policy_gradient_loss | -0.00882    |
|    value_loss           | 50.9        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 281      |
|    mean_reward     | 246      |
| time/              |          |
|    total_timesteps | 247000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 373      |
|    ep_rew_mean     | 234      |
| time/              |          |
|    fps             | 134      |
|    iterations      | 121      |
|    time_elapsed    | 1842     |
|    total_timesteps | 247808   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 319          |
|    mean_reward          | 253          |
| time/                   |              |
|    total_timesteps      | 248000       |
| train/                  |              |
|    approx_kl            | 0.0056452896 |
|    clip_fraction        | 0.0365       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.711       |
|    explained_variance   | 0.694        |
|    learning_rate        | 0.0003       |
|    loss                 | 38.1         |
|    n_updates            | 1210         |
|    policy_gradient_loss | -0.0056      |
|    value_loss           | 144          |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 300      |
|    mean_reward     | 205      |
| time/              |          |
|    total_timesteps | 249000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 378      |
|    ep_rew_mean     | 235      |
| time/              |          |
|    fps             | 134      |
|    iterations      | 122      |
|    time_elapsed    | 1851     |
|    total_timesteps | 249856   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 273          |
|    mean_reward          | 201          |
| time/                   |              |
|    total_timesteps      | 250000       |
| train/                  |              |
|    approx_kl            | 0.0061212573 |
|    clip_fraction        | 0.0712       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.662       |
|    explained_variance   | 0.862        |
|    learning_rate        | 0.0003       |
|    loss                 | 11.3         |
|    n_updates            | 1220         |
|    policy_gradient_loss | -0.00277     |
|    value_loss           | 42.4         |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 292      |
|    mean_reward     | 180      |
| time/              |          |
|    total_timesteps | 251000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 385      |
|    ep_rew_mean     | 241      |
| time/              |          |
|    fps             | 135      |
|    iterations      | 123      |
|    time_elapsed    | 1860     |
|    total_timesteps | 251904   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 441         |
|    mean_reward          | 225         |
| time/                   |             |
|    total_timesteps      | 252000      |
| train/                  |             |
|    approx_kl            | 0.008456837 |
|    clip_fraction        | 0.0896      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.692      |
|    explained_variance   | 0.884       |
|    learning_rate        | 0.0003      |
|    loss                 | 8.77        |
|    n_updates            | 1230        |
|    policy_gradient_loss | -0.00463    |
|    value_loss           | 29.4        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 273      |
|    mean_reward     | 266      |
| time/              |          |
|    total_timesteps | 253000   |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 389      |
|    ep_rew_mean     | 248      |
| time/              |          |
|    fps             | 135      |
|    iterations      | 124      |
|    time_elapsed    | 1871     |
|    total_timesteps | 253952   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 258          |
|    mean_reward          | 250          |
| time/                   |              |
|    total_timesteps      | 254000       |
| train/                  |              |
|    approx_kl            | 0.0066335937 |
|    clip_fraction        | 0.0521       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.645       |
|    explained_variance   | 0.845        |
|    learning_rate        | 0.0003       |
|    loss                 | 6.15         |
|    n_updates            | 1240         |
|    policy_gradient_loss | -0.00144     |
|    value_loss           | 32.4         |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 313      |
|    mean_reward     | 205      |
| time/              |          |
|    total_timesteps | 255000   |
---------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 271      |
|    mean_reward     | 256      |
| time/              |          |
|    total_timesteps | 256000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 402      |
|    ep_rew_mean     | 254      |
| time/              |          |
|    fps             | 135      |
|    iterations      | 125      |
|    time_elapsed    | 1882     |
|    total_timesteps | 256000   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 256          |
|    mean_reward          | 217          |
| time/                   |              |
|    total_timesteps      | 257000       |
| train/                  |              |
|    approx_kl            | 0.0062195137 |
|    clip_fraction        | 0.0656       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.676       |
|    explained_variance   | 0.889        |
|    learning_rate        | 0.0003       |
|    loss                 | 5.71         |
|    n_updates            | 1250         |
|    policy_gradient_loss | -0.00555     |
|    value_loss           | 57.5         |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 279      |
|    mean_reward     | 249      |
| time/              |          |
|    total_timesteps | 258000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 411      |
|    ep_rew_mean     | 257      |
| time/              |          |
|    fps             | 136      |
|    iterations      | 126      |
|    time_elapsed    | 1891     |
|    total_timesteps | 258048   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 261          |
|    mean_reward          | 260          |
| time/                   |              |
|    total_timesteps      | 259000       |
| train/                  |              |
|    approx_kl            | 0.0046809614 |
|    clip_fraction        | 0.0384       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.64        |
|    explained_variance   | 0.774        |
|    learning_rate        | 0.0003       |
|    loss                 | 57.9         |
|    n_updates            | 1260         |
|    policy_gradient_loss | -0.00432     |
|    value_loss           | 187          |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 262      |
|    mean_reward     | 257      |
| time/              |          |
|    total_timesteps | 260000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 408      |
|    ep_rew_mean     | 255      |
| time/              |          |
|    fps             | 136      |
|    iterations      | 127      |
|    time_elapsed    | 1900     |
|    total_timesteps | 260096   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 265          |
|    mean_reward          | 168          |
| time/                   |              |
|    total_timesteps      | 261000       |
| train/                  |              |
|    approx_kl            | 0.0050825328 |
|    clip_fraction        | 0.0189       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.678       |
|    explained_variance   | 0.71         |
|    learning_rate        | 0.0003       |
|    loss                 | 89.1         |
|    n_updates            | 1270         |
|    policy_gradient_loss | -0.00147     |
|    value_loss           | 122          |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 558      |
|    mean_reward     | 160      |
| time/              |          |
|    total_timesteps | 262000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 417      |
|    ep_rew_mean     | 258      |
| time/              |          |
|    fps             | 137      |
|    iterations      | 128      |
|    time_elapsed    | 1911     |
|    total_timesteps | 262144   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 247          |
|    mean_reward          | 159          |
| time/                   |              |
|    total_timesteps      | 263000       |
| train/                  |              |
|    approx_kl            | 0.0055016493 |
|    clip_fraction        | 0.0459       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.651       |
|    explained_variance   | 0.952        |
|    learning_rate        | 0.0003       |
|    loss                 | 20.6         |
|    n_updates            | 1280         |
|    policy_gradient_loss | -0.0035      |
|    value_loss           | 51.7         |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 282      |
|    mean_reward     | 264      |
| time/              |          |
|    total_timesteps | 264000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 409      |
|    ep_rew_mean     | 258      |
| time/              |          |
|    fps             | 137      |
|    iterations      | 129      |
|    time_elapsed    | 1920     |
|    total_timesteps | 264192   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 300         |
|    mean_reward          | 269         |
| time/                   |             |
|    total_timesteps      | 265000      |
| train/                  |             |
|    approx_kl            | 0.003764097 |
|    clip_fraction        | 0.0376      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.666      |
|    explained_variance   | 0.7         |
|    learning_rate        | 0.0003      |
|    loss                 | 38.1        |
|    n_updates            | 1290        |
|    policy_gradient_loss | -0.0026     |
|    value_loss           | 169         |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 247      |
|    mean_reward     | 208      |
| time/              |          |
|    total_timesteps | 266000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 415      |
|    ep_rew_mean     | 258      |
| time/              |          |
|    fps             | 137      |
|    iterations      | 130      |
|    time_elapsed    | 1929     |
|    total_timesteps | 266240   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 407         |
|    mean_reward          | 210         |
| time/                   |             |
|    total_timesteps      | 267000      |
| train/                  |             |
|    approx_kl            | 0.005685869 |
|    clip_fraction        | 0.0571      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.773      |
|    explained_variance   | 0.889       |
|    learning_rate        | 0.0003      |
|    loss                 | 39.7        |
|    n_updates            | 1300        |
|    policy_gradient_loss | -0.00144    |
|    value_loss           | 92.9        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 417      |
|    mean_reward     | 209      |
| time/              |          |
|    total_timesteps | 268000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 416      |
|    ep_rew_mean     | 261      |
| time/              |          |
|    fps             | 138      |
|    iterations      | 131      |
|    time_elapsed    | 1940     |
|    total_timesteps | 268288   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 548          |
|    mean_reward          | 202          |
| time/                   |              |
|    total_timesteps      | 269000       |
| train/                  |              |
|    approx_kl            | 0.0051461877 |
|    clip_fraction        | 0.0552       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.712       |
|    explained_variance   | 0.947        |
|    learning_rate        | 0.0003       |
|    loss                 | 36.2         |
|    n_updates            | 1310         |
|    policy_gradient_loss | -0.00419     |
|    value_loss           | 39.4         |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 472      |
|    mean_reward     | 230      |
| time/              |          |
|    total_timesteps | 270000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 409      |
|    ep_rew_mean     | 260      |
| time/              |          |
|    fps             | 138      |
|    iterations      | 132      |
|    time_elapsed    | 1953     |
|    total_timesteps | 270336   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 433         |
|    mean_reward          | 232         |
| time/                   |             |
|    total_timesteps      | 271000      |
| train/                  |             |
|    approx_kl            | 0.011096987 |
|    clip_fraction        | 0.0662      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.679      |
|    explained_variance   | 0.881       |
|    learning_rate        | 0.0003      |
|    loss                 | 34.1        |
|    n_updates            | 1320        |
|    policy_gradient_loss | -0.00606    |
|    value_loss           | 78.2        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 279      |
|    mean_reward     | 258      |
| time/              |          |
|    total_timesteps | 272000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 398      |
|    ep_rew_mean     | 260      |
| time/              |          |
|    fps             | 138      |
|    iterations      | 133      |
|    time_elapsed    | 1963     |
|    total_timesteps | 272384   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 263          |
|    mean_reward          | 248          |
| time/                   |              |
|    total_timesteps      | 273000       |
| train/                  |              |
|    approx_kl            | 0.0024086114 |
|    clip_fraction        | 0.017        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.675       |
|    explained_variance   | 0.763        |
|    learning_rate        | 0.0003       |
|    loss                 | 81.1         |
|    n_updates            | 1330         |
|    policy_gradient_loss | -0.00152     |
|    value_loss           | 158          |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 565      |
|    mean_reward     | 206      |
| time/              |          |
|    total_timesteps | 274000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 404      |
|    ep_rew_mean     | 259      |
| time/              |          |
|    fps             | 139      |
|    iterations      | 134      |
|    time_elapsed    | 1974     |
|    total_timesteps | 274432   |
---------------------------------


----------------------------------------
| eval/                   |            |
|    mean_ep_length       | 252        |
|    mean_reward          | 220        |
| time/                   |            |
|    total_timesteps      | 275000     |
| train/                  |            |
|    approx_kl            | 0.01148812 |
|    clip_fraction        | 0.112      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.735     |
|    explained_variance   | 0.803      |
|    learning_rate        | 0.0003     |
|    loss                 | 120        |
|    n_updates            | 1340       |
|    policy_gradient_loss | -0.00314   |
|    value_loss           | 128        |
----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 261      |
|    mean_reward     | 201      |
| time/              |          |
|    total_timesteps | 276000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 398      |
|    ep_rew_mean     | 260      |
| time/              |          |
|    fps             | 139      |
|    iterations      | 135      |
|    time_elapsed    | 1982     |
|    total_timesteps | 276480   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 281         |
|    mean_reward          | 198         |
| time/                   |             |
|    total_timesteps      | 277000      |
| train/                  |             |
|    approx_kl            | 0.005786931 |
|    clip_fraction        | 0.0556      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.678      |
|    explained_variance   | 0.736       |
|    learning_rate        | 0.0003      |
|    loss                 | 25.5        |
|    n_updates            | 1350        |
|    policy_gradient_loss | -0.00258    |
|    value_loss           | 107         |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 450      |
|    mean_reward     | 241      |
| time/              |          |
|    total_timesteps | 278000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 392      |
|    ep_rew_mean     | 257      |
| time/              |          |
|    fps             | 139      |
|    iterations      | 136      |
|    time_elapsed    | 1992     |
|    total_timesteps | 278528   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 251          |
|    mean_reward          | 206          |
| time/                   |              |
|    total_timesteps      | 279000       |
| train/                  |              |
|    approx_kl            | 0.0030233557 |
|    clip_fraction        | 0.03         |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.622       |
|    explained_variance   | 0.684        |
|    learning_rate        | 0.0003       |
|    loss                 | 35           |
|    n_updates            | 1360         |
|    policy_gradient_loss | -0.00176     |
|    value_loss           | 116          |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 260      |
|    mean_reward     | 258      |
| time/              |          |
|    total_timesteps | 280000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 386      |
|    ep_rew_mean     | 261      |
| time/              |          |
|    fps             | 140      |
|    iterations      | 137      |
|    time_elapsed    | 2001     |
|    total_timesteps | 280576   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 299         |
|    mean_reward          | 265         |
| time/                   |             |
|    total_timesteps      | 281000      |
| train/                  |             |
|    approx_kl            | 0.004149641 |
|    clip_fraction        | 0.0302      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.61       |
|    explained_variance   | 0.845       |
|    learning_rate        | 0.0003      |
|    loss                 | 57.2        |
|    n_updates            | 1370        |
|    policy_gradient_loss | -0.00209    |
|    value_loss           | 71.3        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 237      |
|    mean_reward     | 200      |
| time/              |          |
|    total_timesteps | 282000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 374      |
|    ep_rew_mean     | 260      |
| time/              |          |
|    fps             | 140      |
|    iterations      | 138      |
|    time_elapsed    | 2009     |
|    total_timesteps | 282624   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 311         |
|    mean_reward          | 265         |
| time/                   |             |
|    total_timesteps      | 283000      |
| train/                  |             |
|    approx_kl            | 0.010773829 |
|    clip_fraction        | 0.0785      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.597      |
|    explained_variance   | 0.795       |
|    learning_rate        | 0.0003      |
|    loss                 | 11.9        |
|    n_updates            | 1380        |
|    policy_gradient_loss | -0.00559    |
|    value_loss           | 87.3        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 239      |
|    mean_reward     | 255      |
| time/              |          |
|    total_timesteps | 284000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 385      |
|    ep_rew_mean     | 260      |
| time/              |          |
|    fps             | 141      |
|    iterations      | 139      |
|    time_elapsed    | 2018     |
|    total_timesteps | 284672   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 397          |
|    mean_reward          | 182          |
| time/                   |              |
|    total_timesteps      | 285000       |
| train/                  |              |
|    approx_kl            | 0.0036868113 |
|    clip_fraction        | 0.048        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.498       |
|    explained_variance   | 0.779        |
|    learning_rate        | 0.0003       |
|    loss                 | 53           |
|    n_updates            | 1390         |
|    policy_gradient_loss | -0.0045      |
|    value_loss           | 111          |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 391      |
|    mean_reward     | 191      |
| time/              |          |
|    total_timesteps | 286000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 385      |
|    ep_rew_mean     | 260      |
| time/              |          |
|    fps             | 141      |
|    iterations      | 140      |
|    time_elapsed    | 2029     |
|    total_timesteps | 286720   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 365         |
|    mean_reward          | 200         |
| time/                   |             |
|    total_timesteps      | 287000      |
| train/                  |             |
|    approx_kl            | 0.003872625 |
|    clip_fraction        | 0.0394      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.558      |
|    explained_variance   | 0.922       |
|    learning_rate        | 0.0003      |
|    loss                 | 27.2        |
|    n_updates            | 1400        |
|    policy_gradient_loss | -0.00194    |
|    value_loss           | 71.5        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 256      |
|    mean_reward     | 260      |
| time/              |          |
|    total_timesteps | 288000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 384      |
|    ep_rew_mean     | 254      |
| time/              |          |
|    fps             | 141      |
|    iterations      | 141      |
|    time_elapsed    | 2039     |
|    total_timesteps | 288768   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 247         |
|    mean_reward          | 253         |
| time/                   |             |
|    total_timesteps      | 289000      |
| train/                  |             |
|    approx_kl            | 0.009342207 |
|    clip_fraction        | 0.0691      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.615      |
|    explained_variance   | 0.86        |
|    learning_rate        | 0.0003      |
|    loss                 | 20.2        |
|    n_updates            | 1410        |
|    policy_gradient_loss | -0.00333    |
|    value_loss           | 125         |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 245      |
|    mean_reward     | 250      |
| time/              |          |
|    total_timesteps | 290000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 377      |
|    ep_rew_mean     | 253      |
| time/              |          |
|    fps             | 142      |
|    iterations      | 142      |
|    time_elapsed    | 2047     |
|    total_timesteps | 290816   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 419         |
|    mean_reward          | 241         |
| time/                   |             |
|    total_timesteps      | 291000      |
| train/                  |             |
|    approx_kl            | 0.008346245 |
|    clip_fraction        | 0.064       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.642      |
|    explained_variance   | 0.895       |
|    learning_rate        | 0.0003      |
|    loss                 | 6.83        |
|    n_updates            | 1420        |
|    policy_gradient_loss | -0.00389    |
|    value_loss           | 43          |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 415      |
|    mean_reward     | 241      |
| time/              |          |
|    total_timesteps | 292000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 356      |
|    ep_rew_mean     | 253      |
| time/              |          |
|    fps             | 142      |
|    iterations      | 143      |
|    time_elapsed    | 2058     |
|    total_timesteps | 292864   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 277         |
|    mean_reward          | 256         |
| time/                   |             |
|    total_timesteps      | 293000      |
| train/                  |             |
|    approx_kl            | 0.004132923 |
|    clip_fraction        | 0.063       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.686      |
|    explained_variance   | 0.782       |
|    learning_rate        | 0.0003      |
|    loss                 | 26.6        |
|    n_updates            | 1430        |
|    policy_gradient_loss | -0.00389    |
|    value_loss           | 157         |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 287      |
|    mean_reward     | 263      |
| time/              |          |
|    total_timesteps | 294000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 352      |
|    ep_rew_mean     | 258      |
| time/              |          |
|    fps             | 142      |
|    iterations      | 144      |
|    time_elapsed    | 2066     |
|    total_timesteps | 294912   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 266          |
|    mean_reward          | 225          |
| time/                   |              |
|    total_timesteps      | 295000       |
| train/                  |              |
|    approx_kl            | 0.0074491748 |
|    clip_fraction        | 0.0677       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.657       |
|    explained_variance   | 0.805        |
|    learning_rate        | 0.0003       |
|    loss                 | 17.8         |
|    n_updates            | 1440         |
|    policy_gradient_loss | -0.00252     |
|    value_loss           | 34.8         |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 426      |
|    mean_reward     | 240      |
| time/              |          |
|    total_timesteps | 296000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 340      |
|    ep_rew_mean     | 260      |
| time/              |          |
|    fps             | 143      |
|    iterations      | 145      |
|    time_elapsed    | 2076     |
|    total_timesteps | 296960   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 269          |
|    mean_reward          | 261          |
| time/                   |              |
|    total_timesteps      | 297000       |
| train/                  |              |
|    approx_kl            | 0.0042551877 |
|    clip_fraction        | 0.0362       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.619       |
|    explained_variance   | 0.801        |
|    learning_rate        | 0.0003       |
|    loss                 | 7.06         |
|    n_updates            | 1450         |
|    policy_gradient_loss | -0.0033      |
|    value_loss           | 76.2         |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 328      |
|    mean_reward     | 253      |
| time/              |          |
|    total_timesteps | 298000   |
---------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 402      |
|    mean_reward     | 194      |
| time/              |          |
|    total_timesteps | 299000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 329      |
|    ep_rew_mean     | 260      |
| time/              |          |
|    fps             | 143      |
|    iterations      | 146      |
|    time_elapsed    | 2087     |
|    total_timesteps | 299008   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 238          |
|    mean_reward          | 226          |
| time/                   |              |
|    total_timesteps      | 300000       |
| train/                  |              |
|    approx_kl            | 0.0039451467 |
|    clip_fraction        | 0.035        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.615       |
|    explained_variance   | 0.777        |
|    learning_rate        | 0.0003       |
|    loss                 | 46.1         |
|    n_updates            | 1460         |
|    policy_gradient_loss | -0.00163     |
|    value_loss           | 128          |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 264      |
|    mean_reward     | 264      |
| time/              |          |
|    total_timesteps | 301000   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 322      |
|    ep_rew_mean     | 254      |
| time/              |          |
|    fps             | 143      |
|    iterations      | 147      |
|    time_elapsed    | 2096     |
|    total_timesteps | 301056   |
---------------------------------


In [4]:

# Load the trained agent
# NOTE: if you have loading issue, you can pass `print_system_info=True`
# to compare the system on which the model was trained vs the current one
# model = DQN.load("dqn_lunar", env=env, print_system_info=True)
env = gym.make("LunarLander-v2", render_mode="rgb_array")
model = PPO.load("./lunar_lander_logs/best_model.zip", env=env)

# Evaluate the agent
# NOTE: If you use wrappers with your environment that modify rewards,
#       this will be reflected here. To evaluate with original rewards,
#       wrap environment in a "Monitor" wrapper before other wrappers.
mean_reward, std_reward = evaluate_policy(model, model.get_env(), n_eval_episodes=10)


Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [5]:
mean_reward

260.5610057

In [6]:
# Enjoy trained agent
total_reward = 0
vec_env = model.get_env()
obs = vec_env.reset()
for i in range(10_000):
    action, _states = model.predict(obs, deterministic=True)
    obs, rewards, dones, info = vec_env.step(action)
    total_reward += rewards

    vec_env.render("human")

print(f"Reward: {total_reward}")
vec_env.close()

Reward: [10197.532]


In [14]:
model = PPO.load('./lunar_lander_logs/best_model.zip')
model.set_env(env=env)
# Train the agent and display a progress bar
model.learn(
    total_timesteps=int(300_000), 
    progress_bar=True,
    callback=eval_callback
)

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Logging to ./logs/PPO_10


---------------------------------
| eval/              |          |
|    mean_ep_length  | 244      |
|    mean_reward     | 163      |
| time/              |          |
|    total_timesteps | 944      |
---------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 555      |
|    mean_reward     | 156      |
| time/              |          |
|    total_timesteps | 1944     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 292      |
|    ep_rew_mean     | 243      |
| time/              |          |
|    fps             | 219      |
|    iterations      | 1        |
|    time_elapsed    | 9        |
|    total_timesteps | 2048     |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 262          |
|    mean_reward          | 217          |
| time/                   |              |
|    total_timesteps      | 2944         |
| train/                  |              |
|    approx_kl            | 0.0043002493 |
|    clip_fraction        | 0.0431       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.701       |
|    explained_variance   | 0.856        |
|    learning_rate        | 0.0003       |
|    loss                 | 33.5         |
|    n_updates            | 1300         |
|    policy_gradient_loss | -0.00356     |
|    value_loss           | 109          |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 308      |
|    mean_reward     | 205      |
| time/              |          |
|    total_timesteps | 3944     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 471      |
|    ep_rew_mean     | 240      |
| time/              |          |
|    fps             | 221      |
|    iterations      | 2        |
|    time_elapsed    | 18       |
|    total_timesteps | 4096     |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 264          |
|    mean_reward          | 195          |
| time/                   |              |
|    total_timesteps      | 4944         |
| train/                  |              |
|    approx_kl            | 0.0044693886 |
|    clip_fraction        | 0.0729       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.702       |
|    explained_variance   | 0.966        |
|    learning_rate        | 0.0003       |
|    loss                 | 6.17         |
|    n_updates            | 1310         |
|    policy_gradient_loss | -0.00149     |
|    value_loss           | 23.3         |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 255      |
|    mean_reward     | 206      |
| time/              |          |
|    total_timesteps | 5944     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 423      |
|    ep_rew_mean     | 228      |
| time/              |          |
|    fps             | 226      |
|    iterations      | 3        |
|    time_elapsed    | 27       |
|    total_timesteps | 6144     |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 402          |
|    mean_reward          | 220          |
| time/                   |              |
|    total_timesteps      | 6944         |
| train/                  |              |
|    approx_kl            | 0.0029360577 |
|    clip_fraction        | 0.0359       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.68        |
|    explained_variance   | 0.762        |
|    learning_rate        | 0.0003       |
|    loss                 | 14.3         |
|    n_updates            | 1320         |
|    policy_gradient_loss | -0.0038      |
|    value_loss           | 147          |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 284      |
|    mean_reward     | 199      |
| time/              |          |
|    total_timesteps | 7944     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 467      |
|    ep_rew_mean     | 225      |
| time/              |          |
|    fps             | 214      |
|    iterations      | 4        |
|    time_elapsed    | 38       |
|    total_timesteps | 8192     |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 249         |
|    mean_reward          | 161         |
| time/                   |             |
|    total_timesteps      | 8944        |
| train/                  |             |
|    approx_kl            | 0.008546751 |
|    clip_fraction        | 0.0863      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.69       |
|    explained_variance   | 0.934       |
|    learning_rate        | 0.0003      |
|    loss                 | 20.8        |
|    n_updates            | 1330        |
|    policy_gradient_loss | -0.00482    |
|    value_loss           | 48.1        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 254      |
|    mean_reward     | 249      |
| time/              |          |
|    total_timesteps | 9944     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 425      |
|    ep_rew_mean     | 211      |
| time/              |          |
|    fps             | 218      |
|    iterations      | 5        |
|    time_elapsed    | 46       |
|    total_timesteps | 10240    |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 416         |
|    mean_reward          | 175         |
| time/                   |             |
|    total_timesteps      | 10944       |
| train/                  |             |
|    approx_kl            | 0.006936902 |
|    clip_fraction        | 0.0419      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.727      |
|    explained_variance   | 0.784       |
|    learning_rate        | 0.0003      |
|    loss                 | 129         |
|    n_updates            | 1340        |
|    policy_gradient_loss | -0.00403    |
|    value_loss           | 195         |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 418      |
|    mean_reward     | 220      |
| time/              |          |
|    total_timesteps | 11944    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 435      |
|    ep_rew_mean     | 228      |
| time/              |          |
|    fps             | 211      |
|    iterations      | 6        |
|    time_elapsed    | 58       |
|    total_timesteps | 12288    |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 245          |
|    mean_reward          | 198          |
| time/                   |              |
|    total_timesteps      | 12944        |
| train/                  |              |
|    approx_kl            | 0.0048977304 |
|    clip_fraction        | 0.0187       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.669       |
|    explained_variance   | 0.873        |
|    learning_rate        | 0.0003       |
|    loss                 | 14.3         |
|    n_updates            | 1350         |
|    policy_gradient_loss | -0.000728    |
|    value_loss           | 58.2         |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 266      |
|    mean_reward     | 206      |
| time/              |          |
|    total_timesteps | 13944    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 405      |
|    ep_rew_mean     | 237      |
| time/              |          |
|    fps             | 213      |
|    iterations      | 7        |
|    time_elapsed    | 67       |
|    total_timesteps | 14336    |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 410         |
|    mean_reward          | 173         |
| time/                   |             |
|    total_timesteps      | 14944       |
| train/                  |             |
|    approx_kl            | 0.004159119 |
|    clip_fraction        | 0.055       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.682      |
|    explained_variance   | 0.771       |
|    learning_rate        | 0.0003      |
|    loss                 | 21.4        |
|    n_updates            | 1360        |
|    policy_gradient_loss | -0.00314    |
|    value_loss           | 65.3        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 312      |
|    mean_reward     | 266      |
| time/              |          |
|    total_timesteps | 15944    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 378      |
|    ep_rew_mean     | 230      |
| time/              |          |
|    fps             | 212      |
|    iterations      | 8        |
|    time_elapsed    | 77       |
|    total_timesteps | 16384    |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 410         |
|    mean_reward          | 224         |
| time/                   |             |
|    total_timesteps      | 16944       |
| train/                  |             |
|    approx_kl            | 0.004328034 |
|    clip_fraction        | 0.0422      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.654      |
|    explained_variance   | 0.758       |
|    learning_rate        | 0.0003      |
|    loss                 | 78.1        |
|    n_updates            | 1370        |
|    policy_gradient_loss | -0.00514    |
|    value_loss           | 159         |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 316      |
|    mean_reward     | 246      |
| time/              |          |
|    total_timesteps | 17944    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 370      |
|    ep_rew_mean     | 229      |
| time/              |          |
|    fps             | 210      |
|    iterations      | 9        |
|    time_elapsed    | 87       |
|    total_timesteps | 18432    |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 337          |
|    mean_reward          | 212          |
| time/                   |              |
|    total_timesteps      | 18944        |
| train/                  |              |
|    approx_kl            | 0.0032914462 |
|    clip_fraction        | 0.0243       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.615       |
|    explained_variance   | 0.679        |
|    learning_rate        | 0.0003       |
|    loss                 | 39.4         |
|    n_updates            | 1380         |
|    policy_gradient_loss | -0.00392     |
|    value_loss           | 147          |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 373      |
|    mean_reward     | 239      |
| time/              |          |
|    total_timesteps | 19944    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 393      |
|    ep_rew_mean     | 236      |
| time/              |          |
|    fps             | 209      |
|    iterations      | 10       |
|    time_elapsed    | 97       |
|    total_timesteps | 20480    |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 408          |
|    mean_reward          | 173          |
| time/                   |              |
|    total_timesteps      | 20944        |
| train/                  |              |
|    approx_kl            | 0.0070554083 |
|    clip_fraction        | 0.059        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.649       |
|    explained_variance   | 0.911        |
|    learning_rate        | 0.0003       |
|    loss                 | 10.3         |
|    n_updates            | 1390         |
|    policy_gradient_loss | -0.00321     |
|    value_loss           | 17.4         |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 462      |
|    mean_reward     | 181      |
| time/              |          |
|    total_timesteps | 21944    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 399      |
|    ep_rew_mean     | 235      |
| time/              |          |
|    fps             | 206      |
|    iterations      | 11       |
|    time_elapsed    | 109      |
|    total_timesteps | 22528    |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 256         |
|    mean_reward          | 209         |
| time/                   |             |
|    total_timesteps      | 22944       |
| train/                  |             |
|    approx_kl            | 0.013284808 |
|    clip_fraction        | 0.126       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.752      |
|    explained_variance   | 0.968       |
|    learning_rate        | 0.0003      |
|    loss                 | 3.6         |
|    n_updates            | 1400        |
|    policy_gradient_loss | -0.00293    |
|    value_loss           | 13.7        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 247      |
|    mean_reward     | 154      |
| time/              |          |
|    total_timesteps | 23944    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 405      |
|    ep_rew_mean     | 233      |
| time/              |          |
|    fps             | 207      |
|    iterations      | 12       |
|    time_elapsed    | 118      |
|    total_timesteps | 24576    |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 400         |
|    mean_reward          | 191         |
| time/                   |             |
|    total_timesteps      | 24944       |
| train/                  |             |
|    approx_kl            | 0.005172138 |
|    clip_fraction        | 0.0331      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.655      |
|    explained_variance   | 0.943       |
|    learning_rate        | 0.0003      |
|    loss                 | 6.25        |
|    n_updates            | 1410        |
|    policy_gradient_loss | -0.00237    |
|    value_loss           | 34.5        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 269      |
|    mean_reward     | 251      |
| time/              |          |
|    total_timesteps | 25944    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 422      |
|    ep_rew_mean     | 229      |
| time/              |          |
|    fps             | 206      |
|    iterations      | 13       |
|    time_elapsed    | 128      |
|    total_timesteps | 26624    |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 270          |
|    mean_reward          | 214          |
| time/                   |              |
|    total_timesteps      | 26944        |
| train/                  |              |
|    approx_kl            | 0.0065880083 |
|    clip_fraction        | 0.0483       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.656       |
|    explained_variance   | 0.845        |
|    learning_rate        | 0.0003       |
|    loss                 | 83.3         |
|    n_updates            | 1420         |
|    policy_gradient_loss | -0.00299     |
|    value_loss           | 89.7         |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 268      |
|    mean_reward     | 253      |
| time/              |          |
|    total_timesteps | 27944    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 406      |
|    ep_rew_mean     | 227      |
| time/              |          |
|    fps             | 208      |
|    iterations      | 14       |
|    time_elapsed    | 137      |
|    total_timesteps | 28672    |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 287          |
|    mean_reward          | 262          |
| time/                   |              |
|    total_timesteps      | 28944        |
| train/                  |              |
|    approx_kl            | 0.0054227556 |
|    clip_fraction        | 0.0522       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.688       |
|    explained_variance   | 0.77         |
|    learning_rate        | 0.0003       |
|    loss                 | 35.4         |
|    n_updates            | 1430         |
|    policy_gradient_loss | -0.00312     |
|    value_loss           | 114          |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 401      |
|    mean_reward     | 170      |
| time/              |          |
|    total_timesteps | 29944    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 402      |
|    ep_rew_mean     | 225      |
| time/              |          |
|    fps             | 208      |
|    iterations      | 15       |
|    time_elapsed    | 147      |
|    total_timesteps | 30720    |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 251          |
|    mean_reward          | 254          |
| time/                   |              |
|    total_timesteps      | 30944        |
| train/                  |              |
|    approx_kl            | 0.0075318487 |
|    clip_fraction        | 0.0857       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.67        |
|    explained_variance   | 0.921        |
|    learning_rate        | 0.0003       |
|    loss                 | 84.1         |
|    n_updates            | 1440         |
|    policy_gradient_loss | -0.00163     |
|    value_loss           | 93.4         |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 285      |
|    mean_reward     | 196      |
| time/              |          |
|    total_timesteps | 31944    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 410      |
|    ep_rew_mean     | 226      |
| time/              |          |
|    fps             | 209      |
|    iterations      | 16       |
|    time_elapsed    | 156      |
|    total_timesteps | 32768    |
---------------------------------


----------------------------------------
| eval/                   |            |
|    mean_ep_length       | 260        |
|    mean_reward          | 199        |
| time/                   |            |
|    total_timesteps      | 32944      |
| train/                  |            |
|    approx_kl            | 0.00626255 |
|    clip_fraction        | 0.0983     |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.623     |
|    explained_variance   | 0.861      |
|    learning_rate        | 0.0003     |
|    loss                 | 6.86       |
|    n_updates            | 1450       |
|    policy_gradient_loss | -0.0026    |
|    value_loss           | 28.5       |
----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 405      |
|    mean_reward     | 173      |
| time/              |          |
|    total_timesteps | 33944    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 414      |
|    ep_rew_mean     | 230      |
| time/              |          |
|    fps             | 208      |
|    iterations      | 17       |
|    time_elapsed    | 166      |
|    total_timesteps | 34816    |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 241          |
|    mean_reward          | 156          |
| time/                   |              |
|    total_timesteps      | 34944        |
| train/                  |              |
|    approx_kl            | 0.0037898382 |
|    clip_fraction        | 0.0277       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.543       |
|    explained_variance   | 0.823        |
|    learning_rate        | 0.0003       |
|    loss                 | 41.2         |
|    n_updates            | 1460         |
|    policy_gradient_loss | -0.00332     |
|    value_loss           | 67.1         |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 289      |
|    mean_reward     | 210      |
| time/              |          |
|    total_timesteps | 35944    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 417      |
|    ep_rew_mean     | 234      |
| time/              |          |
|    fps             | 209      |
|    iterations      | 18       |
|    time_elapsed    | 176      |
|    total_timesteps | 36864    |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 444         |
|    mean_reward          | 236         |
| time/                   |             |
|    total_timesteps      | 36944       |
| train/                  |             |
|    approx_kl            | 0.005068245 |
|    clip_fraction        | 0.0559      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.642      |
|    explained_variance   | 0.915       |
|    learning_rate        | 0.0003      |
|    loss                 | 14.8        |
|    n_updates            | 1470        |
|    policy_gradient_loss | -0.00409    |
|    value_loss           | 48.9        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 262      |
|    mean_reward     | 252      |
| time/              |          |
|    total_timesteps | 37944    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 411      |
|    ep_rew_mean     | 238      |
| time/              |          |
|    fps             | 209      |
|    iterations      | 19       |
|    time_elapsed    | 185      |
|    total_timesteps | 38912    |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 250         |
|    mean_reward          | 199         |
| time/                   |             |
|    total_timesteps      | 38944       |
| train/                  |             |
|    approx_kl            | 0.004958395 |
|    clip_fraction        | 0.0695      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.647      |
|    explained_variance   | 0.806       |
|    learning_rate        | 0.0003      |
|    loss                 | 10          |
|    n_updates            | 1480        |
|    policy_gradient_loss | -0.000619   |
|    value_loss           | 41.6        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 411      |
|    mean_reward     | 139      |
| time/              |          |
|    total_timesteps | 39944    |
---------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 212      |
|    mean_reward     | 64       |
| time/              |          |
|    total_timesteps | 40944    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 401      |
|    ep_rew_mean     | 234      |
| time/              |          |
|    fps             | 208      |
|    iterations      | 20       |
|    time_elapsed    | 196      |
|    total_timesteps | 40960    |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 418          |
|    mean_reward          | 181          |
| time/                   |              |
|    total_timesteps      | 41944        |
| train/                  |              |
|    approx_kl            | 0.0015294247 |
|    clip_fraction        | 0.02         |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.648       |
|    explained_variance   | 0.381        |
|    learning_rate        | 0.0003       |
|    loss                 | 34.1         |
|    n_updates            | 1490         |
|    policy_gradient_loss | -0.000782    |
|    value_loss           | 217          |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 273      |
|    mean_reward     | 243      |
| time/              |          |
|    total_timesteps | 42944    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 384      |
|    ep_rew_mean     | 234      |
| time/              |          |
|    fps             | 208      |
|    iterations      | 21       |
|    time_elapsed    | 206      |
|    total_timesteps | 43008    |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 255         |
|    mean_reward          | 157         |
| time/                   |             |
|    total_timesteps      | 43944       |
| train/                  |             |
|    approx_kl            | 0.005675695 |
|    clip_fraction        | 0.0222      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.636      |
|    explained_variance   | 0.706       |
|    learning_rate        | 0.0003      |
|    loss                 | 87.2        |
|    n_updates            | 1500        |
|    policy_gradient_loss | -0.00199    |
|    value_loss           | 125         |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 260      |
|    mean_reward     | 205      |
| time/              |          |
|    total_timesteps | 44944    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 383      |
|    ep_rew_mean     | 235      |
| time/              |          |
|    fps             | 209      |
|    iterations      | 22       |
|    time_elapsed    | 214      |
|    total_timesteps | 45056    |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 399         |
|    mean_reward          | 208         |
| time/                   |             |
|    total_timesteps      | 45944       |
| train/                  |             |
|    approx_kl            | 0.005558254 |
|    clip_fraction        | 0.0543      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.669      |
|    explained_variance   | 0.886       |
|    learning_rate        | 0.0003      |
|    loss                 | 6.32        |
|    n_updates            | 1510        |
|    policy_gradient_loss | -0.00311    |
|    value_loss           | 64.2        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 262      |
|    mean_reward     | 239      |
| time/              |          |
|    total_timesteps | 46944    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 381      |
|    ep_rew_mean     | 242      |
| time/              |          |
|    fps             | 209      |
|    iterations      | 23       |
|    time_elapsed    | 224      |
|    total_timesteps | 47104    |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 233         |
|    mean_reward          | 205         |
| time/                   |             |
|    total_timesteps      | 47944       |
| train/                  |             |
|    approx_kl            | 0.005262672 |
|    clip_fraction        | 0.0602      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.621      |
|    explained_variance   | 0.913       |
|    learning_rate        | 0.0003      |
|    loss                 | 6.71        |
|    n_updates            | 1520        |
|    policy_gradient_loss | -0.0053     |
|    value_loss           | 28.4        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 264      |
|    mean_reward     | 248      |
| time/              |          |
|    total_timesteps | 48944    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 383      |
|    ep_rew_mean     | 240      |
| time/              |          |
|    fps             | 210      |
|    iterations      | 24       |
|    time_elapsed    | 233      |
|    total_timesteps | 49152    |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 387         |
|    mean_reward          | 129         |
| time/                   |             |
|    total_timesteps      | 49944       |
| train/                  |             |
|    approx_kl            | 0.006168884 |
|    clip_fraction        | 0.0579      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.73       |
|    explained_variance   | 0.874       |
|    learning_rate        | 0.0003      |
|    loss                 | 51.5        |
|    n_updates            | 1530        |
|    policy_gradient_loss | -0.00146    |
|    value_loss           | 97.9        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 271      |
|    mean_reward     | 248      |
| time/              |          |
|    total_timesteps | 50944    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 384      |
|    ep_rew_mean     | 234      |
| time/              |          |
|    fps             | 210      |
|    iterations      | 25       |
|    time_elapsed    | 243      |
|    total_timesteps | 51200    |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 261         |
|    mean_reward          | 208         |
| time/                   |             |
|    total_timesteps      | 51944       |
| train/                  |             |
|    approx_kl            | 0.006959538 |
|    clip_fraction        | 0.0955      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.781      |
|    explained_variance   | 0.877       |
|    learning_rate        | 0.0003      |
|    loss                 | 80.9        |
|    n_updates            | 1540        |
|    policy_gradient_loss | -0.00232    |
|    value_loss           | 72.2        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 426      |
|    mean_reward     | 177      |
| time/              |          |
|    total_timesteps | 52944    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 383      |
|    ep_rew_mean     | 232      |
| time/              |          |
|    fps             | 210      |
|    iterations      | 26       |
|    time_elapsed    | 253      |
|    total_timesteps | 53248    |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 285         |
|    mean_reward          | 212         |
| time/                   |             |
|    total_timesteps      | 53944       |
| train/                  |             |
|    approx_kl            | 0.005638239 |
|    clip_fraction        | 0.0373      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.609      |
|    explained_variance   | 0.809       |
|    learning_rate        | 0.0003      |
|    loss                 | 43.1        |
|    n_updates            | 1550        |
|    policy_gradient_loss | -0.00305    |
|    value_loss           | 118         |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 378      |
|    mean_reward     | 206      |
| time/              |          |
|    total_timesteps | 54944    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 380      |
|    ep_rew_mean     | 232      |
| time/              |          |
|    fps             | 210      |
|    iterations      | 27       |
|    time_elapsed    | 262      |
|    total_timesteps | 55296    |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 559         |
|    mean_reward          | 203         |
| time/                   |             |
|    total_timesteps      | 55944       |
| train/                  |             |
|    approx_kl            | 0.009441911 |
|    clip_fraction        | 0.0677      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.62       |
|    explained_variance   | 0.888       |
|    learning_rate        | 0.0003      |
|    loss                 | 9.83        |
|    n_updates            | 1560        |
|    policy_gradient_loss | -0.00698    |
|    value_loss           | 76.6        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 256      |
|    mean_reward     | 248      |
| time/              |          |
|    total_timesteps | 56944    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 367      |
|    ep_rew_mean     | 227      |
| time/              |          |
|    fps             | 210      |
|    iterations      | 28       |
|    time_elapsed    | 272      |
|    total_timesteps | 57344    |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 218         |
|    mean_reward          | 153         |
| time/                   |             |
|    total_timesteps      | 57944       |
| train/                  |             |
|    approx_kl            | 0.004816227 |
|    clip_fraction        | 0.0384      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.582      |
|    explained_variance   | 0.773       |
|    learning_rate        | 0.0003      |
|    loss                 | 60.4        |
|    n_updates            | 1570        |
|    policy_gradient_loss | -0.00193    |
|    value_loss           | 180         |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 344      |
|    mean_reward     | 194      |
| time/              |          |
|    total_timesteps | 58944    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 358      |
|    ep_rew_mean     | 229      |
| time/              |          |
|    fps             | 210      |
|    iterations      | 29       |
|    time_elapsed    | 281      |
|    total_timesteps | 59392    |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 304         |
|    mean_reward          | 201         |
| time/                   |             |
|    total_timesteps      | 59944       |
| train/                  |             |
|    approx_kl            | 0.009671049 |
|    clip_fraction        | 0.0608      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.658      |
|    explained_variance   | 0.741       |
|    learning_rate        | 0.0003      |
|    loss                 | 103         |
|    n_updates            | 1580        |
|    policy_gradient_loss | -0.00477    |
|    value_loss           | 200         |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 317      |
|    mean_reward     | 261      |
| time/              |          |
|    total_timesteps | 60944    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 348      |
|    ep_rew_mean     | 230      |
| time/              |          |
|    fps             | 210      |
|    iterations      | 30       |
|    time_elapsed    | 291      |
|    total_timesteps | 61440    |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 275          |
|    mean_reward          | 221          |
| time/                   |              |
|    total_timesteps      | 61944        |
| train/                  |              |
|    approx_kl            | 0.0073967353 |
|    clip_fraction        | 0.0686       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.571       |
|    explained_variance   | 0.855        |
|    learning_rate        | 0.0003       |
|    loss                 | 12.6         |
|    n_updates            | 1590         |
|    policy_gradient_loss | -0.00393     |
|    value_loss           | 76.4         |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 224      |
|    mean_reward     | 153      |
| time/              |          |
|    total_timesteps | 62944    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 347      |
|    ep_rew_mean     | 224      |
| time/              |          |
|    fps             | 211      |
|    iterations      | 31       |
|    time_elapsed    | 300      |
|    total_timesteps | 63488    |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 266         |
|    mean_reward          | 200         |
| time/                   |             |
|    total_timesteps      | 63944       |
| train/                  |             |
|    approx_kl            | 0.004412178 |
|    clip_fraction        | 0.0429      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.656      |
|    explained_variance   | 0.817       |
|    learning_rate        | 0.0003      |
|    loss                 | 54.2        |
|    n_updates            | 1600        |
|    policy_gradient_loss | -0.00197    |
|    value_loss           | 182         |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 289      |
|    mean_reward     | 255      |
| time/              |          |
|    total_timesteps | 64944    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 345      |
|    ep_rew_mean     | 221      |
| time/              |          |
|    fps             | 212      |
|    iterations      | 32       |
|    time_elapsed    | 308      |
|    total_timesteps | 65536    |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 325         |
|    mean_reward          | 250         |
| time/                   |             |
|    total_timesteps      | 65944       |
| train/                  |             |
|    approx_kl            | 0.007307171 |
|    clip_fraction        | 0.101       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.662      |
|    explained_variance   | 0.868       |
|    learning_rate        | 0.0003      |
|    loss                 | 116         |
|    n_updates            | 1610        |
|    policy_gradient_loss | -0.00715    |
|    value_loss           | 171         |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 248      |
|    mean_reward     | 205      |
| time/              |          |
|    total_timesteps | 66944    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 342      |
|    ep_rew_mean     | 225      |
| time/              |          |
|    fps             | 212      |
|    iterations      | 33       |
|    time_elapsed    | 318      |
|    total_timesteps | 67584    |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 484          |
|    mean_reward          | 237          |
| time/                   |              |
|    total_timesteps      | 67944        |
| train/                  |              |
|    approx_kl            | 0.0112878755 |
|    clip_fraction        | 0.08         |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.615       |
|    explained_variance   | 0.75         |
|    learning_rate        | 0.0003       |
|    loss                 | 194          |
|    n_updates            | 1620         |
|    policy_gradient_loss | -0.00184     |
|    value_loss           | 143          |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 395      |
|    mean_reward     | 169      |
| time/              |          |
|    total_timesteps | 68944    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 334      |
|    ep_rew_mean     | 219      |
| time/              |          |
|    fps             | 211      |
|    iterations      | 34       |
|    time_elapsed    | 329      |
|    total_timesteps | 69632    |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 251         |
|    mean_reward          | 208         |
| time/                   |             |
|    total_timesteps      | 69944       |
| train/                  |             |
|    approx_kl            | 0.007638059 |
|    clip_fraction        | 0.0771      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.592      |
|    explained_variance   | 0.82        |
|    learning_rate        | 0.0003      |
|    loss                 | 91.2        |
|    n_updates            | 1630        |
|    policy_gradient_loss | -0.00443    |
|    value_loss           | 111         |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 378      |
|    mean_reward     | 164      |
| time/              |          |
|    total_timesteps | 70944    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 332      |
|    ep_rew_mean     | 214      |
| time/              |          |
|    fps             | 211      |
|    iterations      | 35       |
|    time_elapsed    | 338      |
|    total_timesteps | 71680    |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 262          |
|    mean_reward          | 252          |
| time/                   |              |
|    total_timesteps      | 71944        |
| train/                  |              |
|    approx_kl            | 0.0080360975 |
|    clip_fraction        | 0.095        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.63        |
|    explained_variance   | 0.827        |
|    learning_rate        | 0.0003       |
|    loss                 | 29.9         |
|    n_updates            | 1640         |
|    policy_gradient_loss | -0.00405     |
|    value_loss           | 74.9         |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 256      |
|    mean_reward     | 213      |
| time/              |          |
|    total_timesteps | 72944    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 335      |
|    ep_rew_mean     | 213      |
| time/              |          |
|    fps             | 212      |
|    iterations      | 36       |
|    time_elapsed    | 346      |
|    total_timesteps | 73728    |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 330          |
|    mean_reward          | 196          |
| time/                   |              |
|    total_timesteps      | 73944        |
| train/                  |              |
|    approx_kl            | 0.0052702315 |
|    clip_fraction        | 0.0579       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.657       |
|    explained_variance   | 0.822        |
|    learning_rate        | 0.0003       |
|    loss                 | 122          |
|    n_updates            | 1650         |
|    policy_gradient_loss | -0.00431     |
|    value_loss           | 151          |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 257      |
|    mean_reward     | 138      |
| time/              |          |
|    total_timesteps | 74944    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 336      |
|    ep_rew_mean     | 215      |
| time/              |          |
|    fps             | 212      |
|    iterations      | 37       |
|    time_elapsed    | 356      |
|    total_timesteps | 75776    |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 238          |
|    mean_reward          | 194          |
| time/                   |              |
|    total_timesteps      | 75944        |
| train/                  |              |
|    approx_kl            | 0.0069011403 |
|    clip_fraction        | 0.0606       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.68        |
|    explained_variance   | 0.693        |
|    learning_rate        | 0.0003       |
|    loss                 | 69.8         |
|    n_updates            | 1660         |
|    policy_gradient_loss | -0.00175     |
|    value_loss           | 238          |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 231      |
|    mean_reward     | 200      |
| time/              |          |
|    total_timesteps | 76944    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 343      |
|    ep_rew_mean     | 214      |
| time/              |          |
|    fps             | 213      |
|    iterations      | 38       |
|    time_elapsed    | 364      |
|    total_timesteps | 77824    |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 248         |
|    mean_reward          | 253         |
| time/                   |             |
|    total_timesteps      | 77944       |
| train/                  |             |
|    approx_kl            | 0.018156942 |
|    clip_fraction        | 0.0683      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.766      |
|    explained_variance   | 0.701       |
|    learning_rate        | 0.0003      |
|    loss                 | 9.81        |
|    n_updates            | 1670        |
|    policy_gradient_loss | -0.0012     |
|    value_loss           | 43.1        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 350      |
|    mean_reward     | 250      |
| time/              |          |
|    total_timesteps | 78944    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 336      |
|    ep_rew_mean     | 205      |
| time/              |          |
|    fps             | 213      |
|    iterations      | 39       |
|    time_elapsed    | 374      |
|    total_timesteps | 79872    |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 255          |
|    mean_reward          | 206          |
| time/                   |              |
|    total_timesteps      | 79944        |
| train/                  |              |
|    approx_kl            | 0.0037721314 |
|    clip_fraction        | 0.0322       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.675       |
|    explained_variance   | 0.745        |
|    learning_rate        | 0.0003       |
|    loss                 | 60.8         |
|    n_updates            | 1680         |
|    policy_gradient_loss | -0.00421     |
|    value_loss           | 207          |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 274      |
|    mean_reward     | 185      |
| time/              |          |
|    total_timesteps | 80944    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 330      |
|    ep_rew_mean     | 207      |
| time/              |          |
|    fps             | 213      |
|    iterations      | 40       |
|    time_elapsed    | 383      |
|    total_timesteps | 81920    |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 415         |
|    mean_reward          | 239         |
| time/                   |             |
|    total_timesteps      | 81944       |
| train/                  |             |
|    approx_kl            | 0.011303165 |
|    clip_fraction        | 0.0634      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.623      |
|    explained_variance   | 0.746       |
|    learning_rate        | 0.0003      |
|    loss                 | 51.6        |
|    n_updates            | 1690        |
|    policy_gradient_loss | -0.00294    |
|    value_loss           | 170         |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 250      |
|    mean_reward     | 219      |
| time/              |          |
|    total_timesteps | 82944    |
---------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 293      |
|    mean_reward     | 263      |
| time/              |          |
|    total_timesteps | 83944    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 325      |
|    ep_rew_mean     | 207      |
| time/              |          |
|    fps             | 213      |
|    iterations      | 41       |
|    time_elapsed    | 394      |
|    total_timesteps | 83968    |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 227          |
|    mean_reward          | 114          |
| time/                   |              |
|    total_timesteps      | 84944        |
| train/                  |              |
|    approx_kl            | 0.0056921206 |
|    clip_fraction        | 0.0403       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.629       |
|    explained_variance   | 0.623        |
|    learning_rate        | 0.0003       |
|    loss                 | 107          |
|    n_updates            | 1700         |
|    policy_gradient_loss | -0.00198     |
|    value_loss           | 218          |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 314      |
|    mean_reward     | 184      |
| time/              |          |
|    total_timesteps | 85944    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 323      |
|    ep_rew_mean     | 207      |
| time/              |          |
|    fps             | 213      |
|    iterations      | 42       |
|    time_elapsed    | 403      |
|    total_timesteps | 86016    |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 431         |
|    mean_reward          | 226         |
| time/                   |             |
|    total_timesteps      | 86944       |
| train/                  |             |
|    approx_kl            | 0.004550028 |
|    clip_fraction        | 0.0542      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.647      |
|    explained_variance   | 0.813       |
|    learning_rate        | 0.0003      |
|    loss                 | 32.9        |
|    n_updates            | 1710        |
|    policy_gradient_loss | -0.00152    |
|    value_loss           | 120         |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 387      |
|    mean_reward     | 113      |
| time/              |          |
|    total_timesteps | 87944    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 325      |
|    ep_rew_mean     | 209      |
| time/              |          |
|    fps             | 212      |
|    iterations      | 43       |
|    time_elapsed    | 414      |
|    total_timesteps | 88064    |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 397         |
|    mean_reward          | 229         |
| time/                   |             |
|    total_timesteps      | 88944       |
| train/                  |             |
|    approx_kl            | 0.005606184 |
|    clip_fraction        | 0.0486      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.736      |
|    explained_variance   | 0.856       |
|    learning_rate        | 0.0003      |
|    loss                 | 17.2        |
|    n_updates            | 1720        |
|    policy_gradient_loss | -0.00145    |
|    value_loss           | 74.3        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 393      |
|    mean_reward     | 220      |
| time/              |          |
|    total_timesteps | 89944    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 336      |
|    ep_rew_mean     | 215      |
| time/              |          |
|    fps             | 211      |
|    iterations      | 44       |
|    time_elapsed    | 425      |
|    total_timesteps | 90112    |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 253          |
|    mean_reward          | 250          |
| time/                   |              |
|    total_timesteps      | 90944        |
| train/                  |              |
|    approx_kl            | 0.0059570633 |
|    clip_fraction        | 0.0527       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.467       |
|    explained_variance   | 0.904        |
|    learning_rate        | 0.0003       |
|    loss                 | 12.4         |
|    n_updates            | 1730         |
|    policy_gradient_loss | -0.00224     |
|    value_loss           | 73.5         |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 308      |
|    mean_reward     | 247      |
| time/              |          |
|    total_timesteps | 91944    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 340      |
|    ep_rew_mean     | 217      |
| time/              |          |
|    fps             | 212      |
|    iterations      | 45       |
|    time_elapsed    | 434      |
|    total_timesteps | 92160    |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 255          |
|    mean_reward          | 251          |
| time/                   |              |
|    total_timesteps      | 92944        |
| train/                  |              |
|    approx_kl            | 0.0051547317 |
|    clip_fraction        | 0.0692       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.657       |
|    explained_variance   | 0.837        |
|    learning_rate        | 0.0003       |
|    loss                 | 40.3         |
|    n_updates            | 1740         |
|    policy_gradient_loss | -0.00365     |
|    value_loss           | 88.9         |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 239      |
|    mean_reward     | 217      |
| time/              |          |
|    total_timesteps | 93944    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 332      |
|    ep_rew_mean     | 213      |
| time/              |          |
|    fps             | 212      |
|    iterations      | 46       |
|    time_elapsed    | 443      |
|    total_timesteps | 94208    |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 233          |
|    mean_reward          | 201          |
| time/                   |              |
|    total_timesteps      | 94944        |
| train/                  |              |
|    approx_kl            | 0.0044035637 |
|    clip_fraction        | 0.0286       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.604       |
|    explained_variance   | 0.716        |
|    learning_rate        | 0.0003       |
|    loss                 | 253          |
|    n_updates            | 1750         |
|    policy_gradient_loss | -0.00526     |
|    value_loss           | 222          |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 333      |
|    mean_reward     | 135      |
| time/              |          |
|    total_timesteps | 95944    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 340      |
|    ep_rew_mean     | 218      |
| time/              |          |
|    fps             | 212      |
|    iterations      | 47       |
|    time_elapsed    | 452      |
|    total_timesteps | 96256    |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 423          |
|    mean_reward          | 170          |
| time/                   |              |
|    total_timesteps      | 96944        |
| train/                  |              |
|    approx_kl            | 0.0050335373 |
|    clip_fraction        | 0.0657       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.636       |
|    explained_variance   | 0.838        |
|    learning_rate        | 0.0003       |
|    loss                 | 14           |
|    n_updates            | 1760         |
|    policy_gradient_loss | -0.00433     |
|    value_loss           | 92.5         |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 223      |
|    mean_reward     | 142      |
| time/              |          |
|    total_timesteps | 97944    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 354      |
|    ep_rew_mean     | 215      |
| time/              |          |
|    fps             | 212      |
|    iterations      | 48       |
|    time_elapsed    | 462      |
|    total_timesteps | 98304    |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 261         |
|    mean_reward          | 252         |
| time/                   |             |
|    total_timesteps      | 98944       |
| train/                  |             |
|    approx_kl            | 0.009273395 |
|    clip_fraction        | 0.112       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.602      |
|    explained_variance   | 0.859       |
|    learning_rate        | 0.0003      |
|    loss                 | 3.44        |
|    n_updates            | 1770        |
|    policy_gradient_loss | -0.00536    |
|    value_loss           | 35.3        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 270      |
|    mean_reward     | 255      |
| time/              |          |
|    total_timesteps | 99944    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 354      |
|    ep_rew_mean     | 218      |
| time/              |          |
|    fps             | 212      |
|    iterations      | 49       |
|    time_elapsed    | 471      |
|    total_timesteps | 100352   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 250         |
|    mean_reward          | 257         |
| time/                   |             |
|    total_timesteps      | 100944      |
| train/                  |             |
|    approx_kl            | 0.027112097 |
|    clip_fraction        | 0.0825      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.601      |
|    explained_variance   | 0.831       |
|    learning_rate        | 0.0003      |
|    loss                 | 15          |
|    n_updates            | 1780        |
|    policy_gradient_loss | -0.00917    |
|    value_loss           | 142         |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 232      |
|    mean_reward     | 159      |
| time/              |          |
|    total_timesteps | 101944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 347      |
|    ep_rew_mean     | 213      |
| time/              |          |
|    fps             | 212      |
|    iterations      | 50       |
|    time_elapsed    | 480      |
|    total_timesteps | 102400   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 260          |
|    mean_reward          | 192          |
| time/                   |              |
|    total_timesteps      | 102944       |
| train/                  |              |
|    approx_kl            | 0.0038045668 |
|    clip_fraction        | 0.0262       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.56        |
|    explained_variance   | 0.739        |
|    learning_rate        | 0.0003       |
|    loss                 | 91.8         |
|    n_updates            | 1790         |
|    policy_gradient_loss | -0.00339     |
|    value_loss           | 215          |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 398      |
|    mean_reward     | 182      |
| time/              |          |
|    total_timesteps | 103944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 347      |
|    ep_rew_mean     | 214      |
| time/              |          |
|    fps             | 212      |
|    iterations      | 51       |
|    time_elapsed    | 490      |
|    total_timesteps | 104448   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 233         |
|    mean_reward          | 217         |
| time/                   |             |
|    total_timesteps      | 104944      |
| train/                  |             |
|    approx_kl            | 0.006598042 |
|    clip_fraction        | 0.0677      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.64       |
|    explained_variance   | 0.908       |
|    learning_rate        | 0.0003      |
|    loss                 | 71.2        |
|    n_updates            | 1800        |
|    policy_gradient_loss | -0.00397    |
|    value_loss           | 141         |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 261      |
|    mean_reward     | 138      |
| time/              |          |
|    total_timesteps | 105944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 350      |
|    ep_rew_mean     | 214      |
| time/              |          |
|    fps             | 213      |
|    iterations      | 52       |
|    time_elapsed    | 499      |
|    total_timesteps | 106496   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 292          |
|    mean_reward          | 257          |
| time/                   |              |
|    total_timesteps      | 106944       |
| train/                  |              |
|    approx_kl            | 0.0057428777 |
|    clip_fraction        | 0.0693       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.599       |
|    explained_variance   | 0.836        |
|    learning_rate        | 0.0003       |
|    loss                 | 29.4         |
|    n_updates            | 1810         |
|    policy_gradient_loss | -0.00361     |
|    value_loss           | 161          |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 274      |
|    mean_reward     | 267      |
| time/              |          |
|    total_timesteps | 107944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 346      |
|    ep_rew_mean     | 214      |
| time/              |          |
|    fps             | 213      |
|    iterations      | 53       |
|    time_elapsed    | 508      |
|    total_timesteps | 108544   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 291         |
|    mean_reward          | 240         |
| time/                   |             |
|    total_timesteps      | 108944      |
| train/                  |             |
|    approx_kl            | 0.007536448 |
|    clip_fraction        | 0.0567      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.658      |
|    explained_variance   | 0.827       |
|    learning_rate        | 0.0003      |
|    loss                 | 13.9        |
|    n_updates            | 1820        |
|    policy_gradient_loss | -0.0028     |
|    value_loss           | 127         |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 231      |
|    mean_reward     | 209      |
| time/              |          |
|    total_timesteps | 109944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 349      |
|    ep_rew_mean     | 211      |
| time/              |          |
|    fps             | 213      |
|    iterations      | 54       |
|    time_elapsed    | 517      |
|    total_timesteps | 110592   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 251         |
|    mean_reward          | 143         |
| time/                   |             |
|    total_timesteps      | 110944      |
| train/                  |             |
|    approx_kl            | 0.008542303 |
|    clip_fraction        | 0.0548      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.683      |
|    explained_variance   | 0.839       |
|    learning_rate        | 0.0003      |
|    loss                 | 56.3        |
|    n_updates            | 1830        |
|    policy_gradient_loss | -0.00495    |
|    value_loss           | 130         |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 274      |
|    mean_reward     | 270      |
| time/              |          |
|    total_timesteps | 111944   |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 348      |
|    ep_rew_mean     | 214      |
| time/              |          |
|    fps             | 214      |
|    iterations      | 55       |
|    time_elapsed    | 526      |
|    total_timesteps | 112640   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 398          |
|    mean_reward          | 174          |
| time/                   |              |
|    total_timesteps      | 112944       |
| train/                  |              |
|    approx_kl            | 0.0041167014 |
|    clip_fraction        | 0.0266       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.627       |
|    explained_variance   | 0.781        |
|    learning_rate        | 0.0003       |
|    loss                 | 18.7         |
|    n_updates            | 1840         |
|    policy_gradient_loss | -0.00407     |
|    value_loss           | 54.9         |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 405      |
|    mean_reward     | 234      |
| time/              |          |
|    total_timesteps | 113944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 345      |
|    ep_rew_mean     | 219      |
| time/              |          |
|    fps             | 213      |
|    iterations      | 56       |
|    time_elapsed    | 536      |
|    total_timesteps | 114688   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 214         |
|    mean_reward          | 165         |
| time/                   |             |
|    total_timesteps      | 114944      |
| train/                  |             |
|    approx_kl            | 0.007298142 |
|    clip_fraction        | 0.0366      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.621      |
|    explained_variance   | 0.803       |
|    learning_rate        | 0.0003      |
|    loss                 | 23.4        |
|    n_updates            | 1850        |
|    policy_gradient_loss | -0.00267    |
|    value_loss           | 95.1        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 253      |
|    mean_reward     | 265      |
| time/              |          |
|    total_timesteps | 115944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 335      |
|    ep_rew_mean     | 213      |
| time/              |          |
|    fps             | 214      |
|    iterations      | 57       |
|    time_elapsed    | 544      |
|    total_timesteps | 116736   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 389          |
|    mean_reward          | 226          |
| time/                   |              |
|    total_timesteps      | 116944       |
| train/                  |              |
|    approx_kl            | 0.0062441276 |
|    clip_fraction        | 0.0768       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.674       |
|    explained_variance   | 0.765        |
|    learning_rate        | 0.0003       |
|    loss                 | 133          |
|    n_updates            | 1860         |
|    policy_gradient_loss | -0.00921     |
|    value_loss           | 196          |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 255      |
|    mean_reward     | 263      |
| time/              |          |
|    total_timesteps | 117944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 342      |
|    ep_rew_mean     | 222      |
| time/              |          |
|    fps             | 214      |
|    iterations      | 58       |
|    time_elapsed    | 554      |
|    total_timesteps | 118784   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 260         |
|    mean_reward          | 242         |
| time/                   |             |
|    total_timesteps      | 118944      |
| train/                  |             |
|    approx_kl            | 0.005695194 |
|    clip_fraction        | 0.0646      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.593      |
|    explained_variance   | 0.854       |
|    learning_rate        | 0.0003      |
|    loss                 | 58.8        |
|    n_updates            | 1870        |
|    policy_gradient_loss | -0.00515    |
|    value_loss           | 67.8        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 287      |
|    mean_reward     | 259      |
| time/              |          |
|    total_timesteps | 119944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 344      |
|    ep_rew_mean     | 227      |
| time/              |          |
|    fps             | 214      |
|    iterations      | 59       |
|    time_elapsed    | 563      |
|    total_timesteps | 120832   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 251          |
|    mean_reward          | 218          |
| time/                   |              |
|    total_timesteps      | 120944       |
| train/                  |              |
|    approx_kl            | 0.0029971688 |
|    clip_fraction        | 0.0436       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.611       |
|    explained_variance   | 0.898        |
|    learning_rate        | 0.0003       |
|    loss                 | 8.08         |
|    n_updates            | 1880         |
|    policy_gradient_loss | -0.00219     |
|    value_loss           | 53.6         |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 231      |
|    mean_reward     | 199      |
| time/              |          |
|    total_timesteps | 121944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 331      |
|    ep_rew_mean     | 224      |
| time/              |          |
|    fps             | 214      |
|    iterations      | 60       |
|    time_elapsed    | 571      |
|    total_timesteps | 122880   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 290          |
|    mean_reward          | 192          |
| time/                   |              |
|    total_timesteps      | 122944       |
| train/                  |              |
|    approx_kl            | 0.0062095467 |
|    clip_fraction        | 0.0574       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.568       |
|    explained_variance   | 0.886        |
|    learning_rate        | 0.0003       |
|    loss                 | 8.61         |
|    n_updates            | 1890         |
|    policy_gradient_loss | -0.00393     |
|    value_loss           | 83.2         |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 241      |
|    mean_reward     | 204      |
| time/              |          |
|    total_timesteps | 123944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 321      |
|    ep_rew_mean     | 220      |
| time/              |          |
|    fps             | 215      |
|    iterations      | 61       |
|    time_elapsed    | 580      |
|    total_timesteps | 124928   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 266          |
|    mean_reward          | 262          |
| time/                   |              |
|    total_timesteps      | 124944       |
| train/                  |              |
|    approx_kl            | 0.0041602496 |
|    clip_fraction        | 0.049        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.719       |
|    explained_variance   | 0.809        |
|    learning_rate        | 0.0003       |
|    loss                 | 12.9         |
|    n_updates            | 1900         |
|    policy_gradient_loss | -0.00332     |
|    value_loss           | 105          |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 240      |
|    mean_reward     | 269      |
| time/              |          |
|    total_timesteps | 125944   |
---------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 253      |
|    mean_reward     | 199      |
| time/              |          |
|    total_timesteps | 126944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 329      |
|    ep_rew_mean     | 222      |
| time/              |          |
|    fps             | 215      |
|    iterations      | 62       |
|    time_elapsed    | 590      |
|    total_timesteps | 126976   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 247          |
|    mean_reward          | 266          |
| time/                   |              |
|    total_timesteps      | 127944       |
| train/                  |              |
|    approx_kl            | 0.0077404226 |
|    clip_fraction        | 0.087        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.665       |
|    explained_variance   | 0.948        |
|    learning_rate        | 0.0003       |
|    loss                 | 9.77         |
|    n_updates            | 1910         |
|    policy_gradient_loss | -0.00297     |
|    value_loss           | 59.1         |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 250      |
|    mean_reward     | 218      |
| time/              |          |
|    total_timesteps | 128944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 332      |
|    ep_rew_mean     | 232      |
| time/              |          |
|    fps             | 215      |
|    iterations      | 63       |
|    time_elapsed    | 598      |
|    total_timesteps | 129024   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 300         |
|    mean_reward          | 254         |
| time/                   |             |
|    total_timesteps      | 129944      |
| train/                  |             |
|    approx_kl            | 0.007597693 |
|    clip_fraction        | 0.0949      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.662      |
|    explained_variance   | 0.876       |
|    learning_rate        | 0.0003      |
|    loss                 | 6.42        |
|    n_updates            | 1920        |
|    policy_gradient_loss | -0.00534    |
|    value_loss           | 29.2        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 263      |
|    mean_reward     | 204      |
| time/              |          |
|    total_timesteps | 130944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 316      |
|    ep_rew_mean     | 244      |
| time/              |          |
|    fps             | 215      |
|    iterations      | 64       |
|    time_elapsed    | 607      |
|    total_timesteps | 131072   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 422         |
|    mean_reward          | 247         |
| time/                   |             |
|    total_timesteps      | 131944      |
| train/                  |             |
|    approx_kl            | 0.008197753 |
|    clip_fraction        | 0.0621      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.682      |
|    explained_variance   | 0.822       |
|    learning_rate        | 0.0003      |
|    loss                 | 5.18        |
|    n_updates            | 1930        |
|    policy_gradient_loss | -0.00272    |
|    value_loss           | 38.1        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 305      |
|    mean_reward     | 264      |
| time/              |          |
|    total_timesteps | 132944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 307      |
|    ep_rew_mean     | 246      |
| time/              |          |
|    fps             | 215      |
|    iterations      | 65       |
|    time_elapsed    | 617      |
|    total_timesteps | 133120   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 394          |
|    mean_reward          | 232          |
| time/                   |              |
|    total_timesteps      | 133944       |
| train/                  |              |
|    approx_kl            | 0.0039796997 |
|    clip_fraction        | 0.0292       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.7         |
|    explained_variance   | 0.629        |
|    learning_rate        | 0.0003       |
|    loss                 | 27.5         |
|    n_updates            | 1940         |
|    policy_gradient_loss | -0.00245     |
|    value_loss           | 237          |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 270      |
|    mean_reward     | 248      |
| time/              |          |
|    total_timesteps | 134944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 302      |
|    ep_rew_mean     | 248      |
| time/              |          |
|    fps             | 215      |
|    iterations      | 66       |
|    time_elapsed    | 626      |
|    total_timesteps | 135168   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 252          |
|    mean_reward          | 256          |
| time/                   |              |
|    total_timesteps      | 135944       |
| train/                  |              |
|    approx_kl            | 0.0030964073 |
|    clip_fraction        | 0.0228       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.597       |
|    explained_variance   | 0.814        |
|    learning_rate        | 0.0003       |
|    loss                 | 41.1         |
|    n_updates            | 1950         |
|    policy_gradient_loss | -0.00289     |
|    value_loss           | 86.6         |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 210      |
|    mean_reward     | 115      |
| time/              |          |
|    total_timesteps | 136944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 307      |
|    ep_rew_mean     | 252      |
| time/              |          |
|    fps             | 216      |
|    iterations      | 67       |
|    time_elapsed    | 635      |
|    total_timesteps | 137216   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 257         |
|    mean_reward          | 261         |
| time/                   |             |
|    total_timesteps      | 137944      |
| train/                  |             |
|    approx_kl            | 0.007948662 |
|    clip_fraction        | 0.0844      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.666      |
|    explained_variance   | 0.804       |
|    learning_rate        | 0.0003      |
|    loss                 | 18.3        |
|    n_updates            | 1960        |
|    policy_gradient_loss | -0.00297    |
|    value_loss           | 67.2        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 405      |
|    mean_reward     | 217      |
| time/              |          |
|    total_timesteps | 138944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 310      |
|    ep_rew_mean     | 255      |
| time/              |          |
|    fps             | 216      |
|    iterations      | 68       |
|    time_elapsed    | 644      |
|    total_timesteps | 139264   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 391          |
|    mean_reward          | 233          |
| time/                   |              |
|    total_timesteps      | 139944       |
| train/                  |              |
|    approx_kl            | 0.0046203164 |
|    clip_fraction        | 0.055        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.583       |
|    explained_variance   | 0.734        |
|    learning_rate        | 0.0003       |
|    loss                 | 12.6         |
|    n_updates            | 1970         |
|    policy_gradient_loss | -0.00168     |
|    value_loss           | 108          |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 260      |
|    mean_reward     | 252      |
| time/              |          |
|    total_timesteps | 140944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 310      |
|    ep_rew_mean     | 259      |
| time/              |          |
|    fps             | 216      |
|    iterations      | 69       |
|    time_elapsed    | 653      |
|    total_timesteps | 141312   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 258          |
|    mean_reward          | 212          |
| time/                   |              |
|    total_timesteps      | 141944       |
| train/                  |              |
|    approx_kl            | 0.0038524186 |
|    clip_fraction        | 0.0289       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.672       |
|    explained_variance   | 0.794        |
|    learning_rate        | 0.0003       |
|    loss                 | 29.9         |
|    n_updates            | 1980         |
|    policy_gradient_loss | -0.00107     |
|    value_loss           | 106          |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 318      |
|    mean_reward     | 217      |
| time/              |          |
|    total_timesteps | 142944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 300      |
|    ep_rew_mean     | 256      |
| time/              |          |
|    fps             | 216      |
|    iterations      | 70       |
|    time_elapsed    | 662      |
|    total_timesteps | 143360   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 249         |
|    mean_reward          | 254         |
| time/                   |             |
|    total_timesteps      | 143944      |
| train/                  |             |
|    approx_kl            | 0.007010345 |
|    clip_fraction        | 0.0471      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.657      |
|    explained_variance   | 0.863       |
|    learning_rate        | 0.0003      |
|    loss                 | 40.5        |
|    n_updates            | 1990        |
|    policy_gradient_loss | -0.00264    |
|    value_loss           | 96.3        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 362      |
|    mean_reward     | 207      |
| time/              |          |
|    total_timesteps | 144944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 297      |
|    ep_rew_mean     | 254      |
| time/              |          |
|    fps             | 216      |
|    iterations      | 71       |
|    time_elapsed    | 671      |
|    total_timesteps | 145408   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 240         |
|    mean_reward          | 268         |
| time/                   |             |
|    total_timesteps      | 145944      |
| train/                  |             |
|    approx_kl            | 0.005245098 |
|    clip_fraction        | 0.0658      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.679      |
|    explained_variance   | 0.815       |
|    learning_rate        | 0.0003      |
|    loss                 | 26.6        |
|    n_updates            | 2000        |
|    policy_gradient_loss | -0.00475    |
|    value_loss           | 139         |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 561      |
|    mean_reward     | 216      |
| time/              |          |
|    total_timesteps | 146944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 304      |
|    ep_rew_mean     | 257      |
| time/              |          |
|    fps             | 216      |
|    iterations      | 72       |
|    time_elapsed    | 682      |
|    total_timesteps | 147456   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 264         |
|    mean_reward          | 261         |
| time/                   |             |
|    total_timesteps      | 147944      |
| train/                  |             |
|    approx_kl            | 0.003007837 |
|    clip_fraction        | 0.0255      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.587      |
|    explained_variance   | 0.83        |
|    learning_rate        | 0.0003      |
|    loss                 | 45.6        |
|    n_updates            | 2010        |
|    policy_gradient_loss | -0.00102    |
|    value_loss           | 121         |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 285      |
|    mean_reward     | 257      |
| time/              |          |
|    total_timesteps | 148944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 310      |
|    ep_rew_mean     | 253      |
| time/              |          |
|    fps             | 216      |
|    iterations      | 73       |
|    time_elapsed    | 691      |
|    total_timesteps | 149504   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 358          |
|    mean_reward          | 249          |
| time/                   |              |
|    total_timesteps      | 149944       |
| train/                  |              |
|    approx_kl            | 0.0062040575 |
|    clip_fraction        | 0.063        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.691       |
|    explained_variance   | 0.875        |
|    learning_rate        | 0.0003       |
|    loss                 | 6.89         |
|    n_updates            | 2020         |
|    policy_gradient_loss | 0.00069      |
|    value_loss           | 26.3         |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 388      |
|    mean_reward     | 188      |
| time/              |          |
|    total_timesteps | 150944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 320      |
|    ep_rew_mean     | 250      |
| time/              |          |
|    fps             | 215      |
|    iterations      | 74       |
|    time_elapsed    | 701      |
|    total_timesteps | 151552   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 293         |
|    mean_reward          | 245         |
| time/                   |             |
|    total_timesteps      | 151944      |
| train/                  |             |
|    approx_kl            | 0.006405322 |
|    clip_fraction        | 0.0471      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.671      |
|    explained_variance   | 0.807       |
|    learning_rate        | 0.0003      |
|    loss                 | 21.4        |
|    n_updates            | 2030        |
|    policy_gradient_loss | -0.00248    |
|    value_loss           | 49.1        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 226      |
|    mean_reward     | 174      |
| time/              |          |
|    total_timesteps | 152944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 328      |
|    ep_rew_mean     | 251      |
| time/              |          |
|    fps             | 216      |
|    iterations      | 75       |
|    time_elapsed    | 710      |
|    total_timesteps | 153600   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 232          |
|    mean_reward          | 271          |
| time/                   |              |
|    total_timesteps      | 153944       |
| train/                  |              |
|    approx_kl            | 0.0075792368 |
|    clip_fraction        | 0.103        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.67        |
|    explained_variance   | 0.809        |
|    learning_rate        | 0.0003       |
|    loss                 | 26.9         |
|    n_updates            | 2040         |
|    policy_gradient_loss | -0.00529     |
|    value_loss           | 121          |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 422      |
|    mean_reward     | 230      |
| time/              |          |
|    total_timesteps | 154944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 340      |
|    ep_rew_mean     | 250      |
| time/              |          |
|    fps             | 216      |
|    iterations      | 76       |
|    time_elapsed    | 720      |
|    total_timesteps | 155648   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 299         |
|    mean_reward          | 250         |
| time/                   |             |
|    total_timesteps      | 155944      |
| train/                  |             |
|    approx_kl            | 0.014434223 |
|    clip_fraction        | 0.174       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.71       |
|    explained_variance   | 0.956       |
|    learning_rate        | 0.0003      |
|    loss                 | 4.5         |
|    n_updates            | 2050        |
|    policy_gradient_loss | -0.00318    |
|    value_loss           | 22.3        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 438      |
|    mean_reward     | 228      |
| time/              |          |
|    total_timesteps | 156944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 342      |
|    ep_rew_mean     | 249      |
| time/              |          |
|    fps             | 215      |
|    iterations      | 77       |
|    time_elapsed    | 730      |
|    total_timesteps | 157696   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 299          |
|    mean_reward          | 264          |
| time/                   |              |
|    total_timesteps      | 157944       |
| train/                  |              |
|    approx_kl            | 0.0038397566 |
|    clip_fraction        | 0.057        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.575       |
|    explained_variance   | 0.827        |
|    learning_rate        | 0.0003       |
|    loss                 | 75.3         |
|    n_updates            | 2060         |
|    policy_gradient_loss | -0.00166     |
|    value_loss           | 161          |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 238      |
|    mean_reward     | 226      |
| time/              |          |
|    total_timesteps | 158944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 344      |
|    ep_rew_mean     | 250      |
| time/              |          |
|    fps             | 216      |
|    iterations      | 78       |
|    time_elapsed    | 739      |
|    total_timesteps | 159744   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 292          |
|    mean_reward          | 262          |
| time/                   |              |
|    total_timesteps      | 159944       |
| train/                  |              |
|    approx_kl            | 0.0035789157 |
|    clip_fraction        | 0.0159       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.568       |
|    explained_variance   | 0.817        |
|    learning_rate        | 0.0003       |
|    loss                 | 57.4         |
|    n_updates            | 2070         |
|    policy_gradient_loss | -0.004       |
|    value_loss           | 147          |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 385      |
|    mean_reward     | 251      |
| time/              |          |
|    total_timesteps | 160944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 334      |
|    ep_rew_mean     | 243      |
| time/              |          |
|    fps             | 215      |
|    iterations      | 79       |
|    time_elapsed    | 749      |
|    total_timesteps | 161792   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 453         |
|    mean_reward          | 228         |
| time/                   |             |
|    total_timesteps      | 161944      |
| train/                  |             |
|    approx_kl            | 0.003295904 |
|    clip_fraction        | 0.0399      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.667      |
|    explained_variance   | 0.915       |
|    learning_rate        | 0.0003      |
|    loss                 | 53.4        |
|    n_updates            | 2080        |
|    policy_gradient_loss | -0.00269    |
|    value_loss           | 68          |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 332      |
|    mean_reward     | 263      |
| time/              |          |
|    total_timesteps | 162944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 335      |
|    ep_rew_mean     | 236      |
| time/              |          |
|    fps             | 215      |
|    iterations      | 80       |
|    time_elapsed    | 759      |
|    total_timesteps | 163840   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 261          |
|    mean_reward          | 229          |
| time/                   |              |
|    total_timesteps      | 163944       |
| train/                  |              |
|    approx_kl            | 0.0038857376 |
|    clip_fraction        | 0.0318       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.641       |
|    explained_variance   | 0.891        |
|    learning_rate        | 0.0003       |
|    loss                 | 52.8         |
|    n_updates            | 2090         |
|    policy_gradient_loss | -0.00325     |
|    value_loss           | 99.9         |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 217      |
|    mean_reward     | 182      |
| time/              |          |
|    total_timesteps | 164944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 336      |
|    ep_rew_mean     | 238      |
| time/              |          |
|    fps             | 215      |
|    iterations      | 81       |
|    time_elapsed    | 768      |
|    total_timesteps | 165888   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 267         |
|    mean_reward          | 255         |
| time/                   |             |
|    total_timesteps      | 165944      |
| train/                  |             |
|    approx_kl            | 0.016936824 |
|    clip_fraction        | 0.177       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.701      |
|    explained_variance   | 0.909       |
|    learning_rate        | 0.0003      |
|    loss                 | 2.15        |
|    n_updates            | 2100        |
|    policy_gradient_loss | -0.0179     |
|    value_loss           | 17.9        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 278      |
|    mean_reward     | 203      |
| time/              |          |
|    total_timesteps | 166944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 349      |
|    ep_rew_mean     | 237      |
| time/              |          |
|    fps             | 216      |
|    iterations      | 82       |
|    time_elapsed    | 777      |
|    total_timesteps | 167936   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 229          |
|    mean_reward          | 200          |
| time/                   |              |
|    total_timesteps      | 167944       |
| train/                  |              |
|    approx_kl            | 0.0026822994 |
|    clip_fraction        | 0.0271       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.648       |
|    explained_variance   | 0.669        |
|    learning_rate        | 0.0003       |
|    loss                 | 162          |
|    n_updates            | 2110         |
|    policy_gradient_loss | -0.000913    |
|    value_loss           | 261          |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 238      |
|    mean_reward     | 154      |
| time/              |          |
|    total_timesteps | 168944   |
---------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 253      |
|    mean_reward     | 182      |
| time/              |          |
|    total_timesteps | 169944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 354      |
|    ep_rew_mean     | 232      |
| time/              |          |
|    fps             | 216      |
|    iterations      | 83       |
|    time_elapsed    | 786      |
|    total_timesteps | 169984   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 558          |
|    mean_reward          | 212          |
| time/                   |              |
|    total_timesteps      | 170944       |
| train/                  |              |
|    approx_kl            | 0.0028901994 |
|    clip_fraction        | 0.05         |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.62        |
|    explained_variance   | 0.903        |
|    learning_rate        | 0.0003       |
|    loss                 | 26.6         |
|    n_updates            | 2120         |
|    policy_gradient_loss | -0.00169     |
|    value_loss           | 82.7         |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 266      |
|    mean_reward     | 269      |
| time/              |          |
|    total_timesteps | 171944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 352      |
|    ep_rew_mean     | 235      |
| time/              |          |
|    fps             | 215      |
|    iterations      | 84       |
|    time_elapsed    | 797      |
|    total_timesteps | 172032   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 240         |
|    mean_reward          | 170         |
| time/                   |             |
|    total_timesteps      | 172944      |
| train/                  |             |
|    approx_kl            | 0.005540833 |
|    clip_fraction        | 0.0663      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.678      |
|    explained_variance   | 0.8         |
|    learning_rate        | 0.0003      |
|    loss                 | 32.5        |
|    n_updates            | 2130        |
|    policy_gradient_loss | -0.00505    |
|    value_loss           | 155         |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 259      |
|    mean_reward     | 261      |
| time/              |          |
|    total_timesteps | 173944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 363      |
|    ep_rew_mean     | 234      |
| time/              |          |
|    fps             | 215      |
|    iterations      | 85       |
|    time_elapsed    | 806      |
|    total_timesteps | 174080   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 424          |
|    mean_reward          | 239          |
| time/                   |              |
|    total_timesteps      | 174944       |
| train/                  |              |
|    approx_kl            | 0.0050163874 |
|    clip_fraction        | 0.0853       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.715       |
|    explained_variance   | 0.959        |
|    learning_rate        | 0.0003       |
|    loss                 | 10.9         |
|    n_updates            | 2140         |
|    policy_gradient_loss | -0.006       |
|    value_loss           | 24.5         |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 403      |
|    mean_reward     | 230      |
| time/              |          |
|    total_timesteps | 175944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 362      |
|    ep_rew_mean     | 237      |
| time/              |          |
|    fps             | 215      |
|    iterations      | 86       |
|    time_elapsed    | 817      |
|    total_timesteps | 176128   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 228         |
|    mean_reward          | 257         |
| time/                   |             |
|    total_timesteps      | 176944      |
| train/                  |             |
|    approx_kl            | 0.006849155 |
|    clip_fraction        | 0.0699      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.631      |
|    explained_variance   | 0.805       |
|    learning_rate        | 0.0003      |
|    loss                 | 6.6         |
|    n_updates            | 2150        |
|    policy_gradient_loss | -0.00283    |
|    value_loss           | 39.4        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 385      |
|    mean_reward     | 222      |
| time/              |          |
|    total_timesteps | 177944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 356      |
|    ep_rew_mean     | 238      |
| time/              |          |
|    fps             | 215      |
|    iterations      | 87       |
|    time_elapsed    | 826      |
|    total_timesteps | 178176   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 236         |
|    mean_reward          | 268         |
| time/                   |             |
|    total_timesteps      | 178944      |
| train/                  |             |
|    approx_kl            | 0.005223415 |
|    clip_fraction        | 0.0577      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.574      |
|    explained_variance   | 0.917       |
|    learning_rate        | 0.0003      |
|    loss                 | 5.69        |
|    n_updates            | 2160        |
|    policy_gradient_loss | -0.00319    |
|    value_loss           | 27          |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 242      |
|    mean_reward     | 209      |
| time/              |          |
|    total_timesteps | 179944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 364      |
|    ep_rew_mean     | 244      |
| time/              |          |
|    fps             | 215      |
|    iterations      | 88       |
|    time_elapsed    | 835      |
|    total_timesteps | 180224   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 274         |
|    mean_reward          | 264         |
| time/                   |             |
|    total_timesteps      | 180944      |
| train/                  |             |
|    approx_kl            | 0.004175864 |
|    clip_fraction        | 0.0344      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.645      |
|    explained_variance   | 0.919       |
|    learning_rate        | 0.0003      |
|    loss                 | 12.8        |
|    n_updates            | 2170        |
|    policy_gradient_loss | -0.00481    |
|    value_loss           | 60.3        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 237      |
|    mean_reward     | 256      |
| time/              |          |
|    total_timesteps | 181944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 361      |
|    ep_rew_mean     | 246      |
| time/              |          |
|    fps             | 216      |
|    iterations      | 89       |
|    time_elapsed    | 843      |
|    total_timesteps | 182272   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 431         |
|    mean_reward          | 239         |
| time/                   |             |
|    total_timesteps      | 182944      |
| train/                  |             |
|    approx_kl            | 0.015926221 |
|    clip_fraction        | 0.0787      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.648      |
|    explained_variance   | 0.93        |
|    learning_rate        | 0.0003      |
|    loss                 | 17          |
|    n_updates            | 2180        |
|    policy_gradient_loss | -0.0062     |
|    value_loss           | 39.9        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 416      |
|    mean_reward     | 242      |
| time/              |          |
|    total_timesteps | 183944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 343      |
|    ep_rew_mean     | 251      |
| time/              |          |
|    fps             | 215      |
|    iterations      | 90       |
|    time_elapsed    | 854      |
|    total_timesteps | 184320   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 255         |
|    mean_reward          | 259         |
| time/                   |             |
|    total_timesteps      | 184944      |
| train/                  |             |
|    approx_kl            | 0.005793146 |
|    clip_fraction        | 0.0657      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.658      |
|    explained_variance   | 0.846       |
|    learning_rate        | 0.0003      |
|    loss                 | 17.3        |
|    n_updates            | 2190        |
|    policy_gradient_loss | -0.00409    |
|    value_loss           | 62.4        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 246      |
|    mean_reward     | 257      |
| time/              |          |
|    total_timesteps | 185944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 343      |
|    ep_rew_mean     | 254      |
| time/              |          |
|    fps             | 215      |
|    iterations      | 91       |
|    time_elapsed    | 863      |
|    total_timesteps | 186368   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 239          |
|    mean_reward          | 248          |
| time/                   |              |
|    total_timesteps      | 186944       |
| train/                  |              |
|    approx_kl            | 0.0058042686 |
|    clip_fraction        | 0.0425       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.563       |
|    explained_variance   | 0.852        |
|    learning_rate        | 0.0003       |
|    loss                 | 14.6         |
|    n_updates            | 2200         |
|    policy_gradient_loss | -0.00248     |
|    value_loss           | 51.7         |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 264      |
|    mean_reward     | 264      |
| time/              |          |
|    total_timesteps | 187944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 323      |
|    ep_rew_mean     | 253      |
| time/              |          |
|    fps             | 216      |
|    iterations      | 92       |
|    time_elapsed    | 871      |
|    total_timesteps | 188416   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 395         |
|    mean_reward          | 235         |
| time/                   |             |
|    total_timesteps      | 188944      |
| train/                  |             |
|    approx_kl            | 0.007199488 |
|    clip_fraction        | 0.0649      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.62       |
|    explained_variance   | 0.847       |
|    learning_rate        | 0.0003      |
|    loss                 | 13          |
|    n_updates            | 2210        |
|    policy_gradient_loss | -0.00536    |
|    value_loss           | 95.8        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 403      |
|    mean_reward     | 231      |
| time/              |          |
|    total_timesteps | 189944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 331      |
|    ep_rew_mean     | 257      |
| time/              |          |
|    fps             | 215      |
|    iterations      | 93       |
|    time_elapsed    | 882      |
|    total_timesteps | 190464   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 260          |
|    mean_reward          | 267          |
| time/                   |              |
|    total_timesteps      | 190944       |
| train/                  |              |
|    approx_kl            | 0.0072191246 |
|    clip_fraction        | 0.0672       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.65        |
|    explained_variance   | 0.937        |
|    learning_rate        | 0.0003       |
|    loss                 | 14.7         |
|    n_updates            | 2220         |
|    policy_gradient_loss | -0.00196     |
|    value_loss           | 44.5         |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 236      |
|    mean_reward     | 279      |
| time/              |          |
|    total_timesteps | 191944   |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 332      |
|    ep_rew_mean     | 258      |
| time/              |          |
|    fps             | 216      |
|    iterations      | 94       |
|    time_elapsed    | 891      |
|    total_timesteps | 192512   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 302          |
|    mean_reward          | 254          |
| time/                   |              |
|    total_timesteps      | 192944       |
| train/                  |              |
|    approx_kl            | 0.0046390565 |
|    clip_fraction        | 0.0572       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.609       |
|    explained_variance   | 0.92         |
|    learning_rate        | 0.0003       |
|    loss                 | 14.1         |
|    n_updates            | 2230         |
|    policy_gradient_loss | -0.00265     |
|    value_loss           | 57.9         |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 220      |
|    mean_reward     | 201      |
| time/              |          |
|    total_timesteps | 193944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 325      |
|    ep_rew_mean     | 264      |
| time/              |          |
|    fps             | 216      |
|    iterations      | 95       |
|    time_elapsed    | 899      |
|    total_timesteps | 194560   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 344         |
|    mean_reward          | 242         |
| time/                   |             |
|    total_timesteps      | 194944      |
| train/                  |             |
|    approx_kl            | 0.004359568 |
|    clip_fraction        | 0.0272      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.648      |
|    explained_variance   | 0.878       |
|    learning_rate        | 0.0003      |
|    loss                 | 11.4        |
|    n_updates            | 2240        |
|    policy_gradient_loss | -0.00109    |
|    value_loss           | 23.5        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 280      |
|    mean_reward     | 254      |
| time/              |          |
|    total_timesteps | 195944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 326      |
|    ep_rew_mean     | 268      |
| time/              |          |
|    fps             | 216      |
|    iterations      | 96       |
|    time_elapsed    | 908      |
|    total_timesteps | 196608   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 265          |
|    mean_reward          | 194          |
| time/                   |              |
|    total_timesteps      | 196944       |
| train/                  |              |
|    approx_kl            | 0.0035123099 |
|    clip_fraction        | 0.0534       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.681       |
|    explained_variance   | 0.792        |
|    learning_rate        | 0.0003       |
|    loss                 | 5.93         |
|    n_updates            | 2250         |
|    policy_gradient_loss | -0.00166     |
|    value_loss           | 60.5         |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 271      |
|    mean_reward     | 254      |
| time/              |          |
|    total_timesteps | 197944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 310      |
|    ep_rew_mean     | 269      |
| time/              |          |
|    fps             | 216      |
|    iterations      | 97       |
|    time_elapsed    | 917      |
|    total_timesteps | 198656   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 241         |
|    mean_reward          | 230         |
| time/                   |             |
|    total_timesteps      | 198944      |
| train/                  |             |
|    approx_kl            | 0.004739701 |
|    clip_fraction        | 0.0334      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.618      |
|    explained_variance   | 0.643       |
|    learning_rate        | 0.0003      |
|    loss                 | 55.5        |
|    n_updates            | 2260        |
|    policy_gradient_loss | -0.00447    |
|    value_loss           | 166         |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 322      |
|    mean_reward     | 236      |
| time/              |          |
|    total_timesteps | 199944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 302      |
|    ep_rew_mean     | 270      |
| time/              |          |
|    fps             | 216      |
|    iterations      | 98       |
|    time_elapsed    | 926      |
|    total_timesteps | 200704   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 239         |
|    mean_reward          | 279         |
| time/                   |             |
|    total_timesteps      | 200944      |
| train/                  |             |
|    approx_kl            | 0.003906569 |
|    clip_fraction        | 0.039       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.639      |
|    explained_variance   | 0.84        |
|    learning_rate        | 0.0003      |
|    loss                 | 9.21        |
|    n_updates            | 2270        |
|    policy_gradient_loss | -0.00304    |
|    value_loss           | 69.1        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 240      |
|    mean_reward     | 275      |
| time/              |          |
|    total_timesteps | 201944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 300      |
|    ep_rew_mean     | 273      |
| time/              |          |
|    fps             | 216      |
|    iterations      | 99       |
|    time_elapsed    | 935      |
|    total_timesteps | 202752   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 242         |
|    mean_reward          | 276         |
| time/                   |             |
|    total_timesteps      | 202944      |
| train/                  |             |
|    approx_kl            | 0.009676846 |
|    clip_fraction        | 0.0741      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.652      |
|    explained_variance   | 0.898       |
|    learning_rate        | 0.0003      |
|    loss                 | 25          |
|    n_updates            | 2280        |
|    policy_gradient_loss | -0.00603    |
|    value_loss           | 63.9        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 392      |
|    mean_reward     | 248      |
| time/              |          |
|    total_timesteps | 203944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 290      |
|    ep_rew_mean     | 275      |
| time/              |          |
|    fps             | 216      |
|    iterations      | 100      |
|    time_elapsed    | 944      |
|    total_timesteps | 204800   |
---------------------------------


----------------------------------------
| eval/                   |            |
|    mean_ep_length       | 227        |
|    mean_reward          | 274        |
| time/                   |            |
|    total_timesteps      | 204944     |
| train/                  |            |
|    approx_kl            | 0.00840351 |
|    clip_fraction        | 0.0944     |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.681     |
|    explained_variance   | 0.875      |
|    learning_rate        | 0.0003     |
|    loss                 | 12.4       |
|    n_updates            | 2290       |
|    policy_gradient_loss | -0.0023    |
|    value_loss           | 29.2       |
----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 225      |
|    mean_reward     | 271      |
| time/              |          |
|    total_timesteps | 205944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 288      |
|    ep_rew_mean     | 271      |
| time/              |          |
|    fps             | 217      |
|    iterations      | 101      |
|    time_elapsed    | 952      |
|    total_timesteps | 206848   |
---------------------------------


----------------------------------------
| eval/                   |            |
|    mean_ep_length       | 294        |
|    mean_reward          | 264        |
| time/                   |            |
|    total_timesteps      | 206944     |
| train/                  |            |
|    approx_kl            | 0.00492484 |
|    clip_fraction        | 0.0722     |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.651     |
|    explained_variance   | 0.784      |
|    learning_rate        | 0.0003     |
|    loss                 | 97         |
|    n_updates            | 2300       |
|    policy_gradient_loss | -0.0045    |
|    value_loss           | 114        |
----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 261      |
|    mean_reward     | 252      |
| time/              |          |
|    total_timesteps | 207944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 281      |
|    ep_rew_mean     | 270      |
| time/              |          |
|    fps             | 217      |
|    iterations      | 102      |
|    time_elapsed    | 961      |
|    total_timesteps | 208896   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 233          |
|    mean_reward          | 264          |
| time/                   |              |
|    total_timesteps      | 208944       |
| train/                  |              |
|    approx_kl            | 0.0020826096 |
|    clip_fraction        | 0.0137       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.6         |
|    explained_variance   | 0.705        |
|    learning_rate        | 0.0003       |
|    loss                 | 55.4         |
|    n_updates            | 2310         |
|    policy_gradient_loss | -0.000432    |
|    value_loss           | 132          |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 296      |
|    mean_reward     | 267      |
| time/              |          |
|    total_timesteps | 209944   |
---------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 393      |
|    mean_reward     | 254      |
| time/              |          |
|    total_timesteps | 210944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 271      |
|    ep_rew_mean     | 265      |
| time/              |          |
|    fps             | 217      |
|    iterations      | 103      |
|    time_elapsed    | 971      |
|    total_timesteps | 210944   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 395          |
|    mean_reward          | 235          |
| time/                   |              |
|    total_timesteps      | 211944       |
| train/                  |              |
|    approx_kl            | 0.0018786353 |
|    clip_fraction        | 0.0192       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.656       |
|    explained_variance   | 0.722        |
|    learning_rate        | 0.0003       |
|    loss                 | 12.6         |
|    n_updates            | 2320         |
|    policy_gradient_loss | -0.00148     |
|    value_loss           | 93.4         |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 267      |
|    mean_reward     | 268      |
| time/              |          |
|    total_timesteps | 212944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 270      |
|    ep_rew_mean     | 263      |
| time/              |          |
|    fps             | 217      |
|    iterations      | 104      |
|    time_elapsed    | 981      |
|    total_timesteps | 212992   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 233          |
|    mean_reward          | 271          |
| time/                   |              |
|    total_timesteps      | 213944       |
| train/                  |              |
|    approx_kl            | 0.0072555663 |
|    clip_fraction        | 0.0558       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.677       |
|    explained_variance   | 0.837        |
|    learning_rate        | 0.0003       |
|    loss                 | 95           |
|    n_updates            | 2330         |
|    policy_gradient_loss | -0.00326     |
|    value_loss           | 68           |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 244      |
|    mean_reward     | 252      |
| time/              |          |
|    total_timesteps | 214944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 265      |
|    ep_rew_mean     | 266      |
| time/              |          |
|    fps             | 217      |
|    iterations      | 105      |
|    time_elapsed    | 989      |
|    total_timesteps | 215040   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 375          |
|    mean_reward          | 258          |
| time/                   |              |
|    total_timesteps      | 215944       |
| train/                  |              |
|    approx_kl            | 0.0044332137 |
|    clip_fraction        | 0.0356       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.579       |
|    explained_variance   | 0.831        |
|    learning_rate        | 0.0003       |
|    loss                 | 7.44         |
|    n_updates            | 2340         |
|    policy_gradient_loss | -0.000397    |
|    value_loss           | 39.1         |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 290      |
|    mean_reward     | 259      |
| time/              |          |
|    total_timesteps | 216944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 263      |
|    ep_rew_mean     | 266      |
| time/              |          |
|    fps             | 217      |
|    iterations      | 106      |
|    time_elapsed    | 998      |
|    total_timesteps | 217088   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 228         |
|    mean_reward          | 262         |
| time/                   |             |
|    total_timesteps      | 217944      |
| train/                  |             |
|    approx_kl            | 0.017533898 |
|    clip_fraction        | 0.0815      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.619      |
|    explained_variance   | 0.668       |
|    learning_rate        | 0.0003      |
|    loss                 | 13.7        |
|    n_updates            | 2350        |
|    policy_gradient_loss | -0.00641    |
|    value_loss           | 40.5        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 255      |
|    mean_reward     | 259      |
| time/              |          |
|    total_timesteps | 218944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 261      |
|    ep_rew_mean     | 267      |
| time/              |          |
|    fps             | 217      |
|    iterations      | 107      |
|    time_elapsed    | 1007     |
|    total_timesteps | 219136   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 395         |
|    mean_reward          | 226         |
| time/                   |             |
|    total_timesteps      | 219944      |
| train/                  |             |
|    approx_kl            | 0.010235328 |
|    clip_fraction        | 0.0965      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.688      |
|    explained_variance   | 0.904       |
|    learning_rate        | 0.0003      |
|    loss                 | 3.97        |
|    n_updates            | 2360        |
|    policy_gradient_loss | -0.0079     |
|    value_loss           | 23.7        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 422      |
|    mean_reward     | 241      |
| time/              |          |
|    total_timesteps | 220944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 263      |
|    ep_rew_mean     | 274      |
| time/              |          |
|    fps             | 217      |
|    iterations      | 108      |
|    time_elapsed    | 1018     |
|    total_timesteps | 221184   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 242         |
|    mean_reward          | 261         |
| time/                   |             |
|    total_timesteps      | 221944      |
| train/                  |             |
|    approx_kl            | 0.005684398 |
|    clip_fraction        | 0.0424      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.595      |
|    explained_variance   | 0.874       |
|    learning_rate        | 0.0003      |
|    loss                 | 20.5        |
|    n_updates            | 2370        |
|    policy_gradient_loss | -0.00211    |
|    value_loss           | 21.1        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 234      |
|    mean_reward     | 272      |
| time/              |          |
|    total_timesteps | 222944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 270      |
|    ep_rew_mean     | 274      |
| time/              |          |
|    fps             | 217      |
|    iterations      | 109      |
|    time_elapsed    | 1026     |
|    total_timesteps | 223232   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 384         |
|    mean_reward          | 234         |
| time/                   |             |
|    total_timesteps      | 223944      |
| train/                  |             |
|    approx_kl            | 0.008039585 |
|    clip_fraction        | 0.0676      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.628      |
|    explained_variance   | 0.96        |
|    learning_rate        | 0.0003      |
|    loss                 | 7.55        |
|    n_updates            | 2380        |
|    policy_gradient_loss | -0.00169    |
|    value_loss           | 20          |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 244      |
|    mean_reward     | 283      |
| time/              |          |
|    total_timesteps | 224944   |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 273      |
|    ep_rew_mean     | 278      |
| time/              |          |
|    fps             | 217      |
|    iterations      | 110      |
|    time_elapsed    | 1035     |
|    total_timesteps | 225280   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 245          |
|    mean_reward          | 276          |
| time/                   |              |
|    total_timesteps      | 225944       |
| train/                  |              |
|    approx_kl            | 0.0045874235 |
|    clip_fraction        | 0.0577       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.648       |
|    explained_variance   | 0.847        |
|    learning_rate        | 0.0003       |
|    loss                 | 21.5         |
|    n_updates            | 2390         |
|    policy_gradient_loss | -0.00206     |
|    value_loss           | 25.6         |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 249      |
|    mean_reward     | 282      |
| time/              |          |
|    total_timesteps | 226944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 274      |
|    ep_rew_mean     | 284      |
| time/              |          |
|    fps             | 217      |
|    iterations      | 111      |
|    time_elapsed    | 1043     |
|    total_timesteps | 227328   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 294         |
|    mean_reward          | 251         |
| time/                   |             |
|    total_timesteps      | 227944      |
| train/                  |             |
|    approx_kl            | 0.008453184 |
|    clip_fraction        | 0.0802      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.666      |
|    explained_variance   | 0.914       |
|    learning_rate        | 0.0003      |
|    loss                 | 13.8        |
|    n_updates            | 2400        |
|    policy_gradient_loss | -0.000623   |
|    value_loss           | 16.6        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 234      |
|    mean_reward     | 278      |
| time/              |          |
|    total_timesteps | 228944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 280      |
|    ep_rew_mean     | 284      |
| time/              |          |
|    fps             | 217      |
|    iterations      | 112      |
|    time_elapsed    | 1052     |
|    total_timesteps | 229376   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 381         |
|    mean_reward          | 271         |
| time/                   |             |
|    total_timesteps      | 229944      |
| train/                  |             |
|    approx_kl            | 0.004221967 |
|    clip_fraction        | 0.0419      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.598      |
|    explained_variance   | 0.945       |
|    learning_rate        | 0.0003      |
|    loss                 | 15.9        |
|    n_updates            | 2410        |
|    policy_gradient_loss | -0.00208    |
|    value_loss           | 39.6        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 256      |
|    mean_reward     | 263      |
| time/              |          |
|    total_timesteps | 230944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 283      |
|    ep_rew_mean     | 282      |
| time/              |          |
|    fps             | 217      |
|    iterations      | 113      |
|    time_elapsed    | 1062     |
|    total_timesteps | 231424   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 255         |
|    mean_reward          | 277         |
| time/                   |             |
|    total_timesteps      | 231944      |
| train/                  |             |
|    approx_kl            | 0.005275789 |
|    clip_fraction        | 0.0873      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.629      |
|    explained_variance   | 0.992       |
|    learning_rate        | 0.0003      |
|    loss                 | 4.23        |
|    n_updates            | 2420        |
|    policy_gradient_loss | -0.00608    |
|    value_loss           | 13          |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 246      |
|    mean_reward     | 279      |
| time/              |          |
|    total_timesteps | 232944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 286      |
|    ep_rew_mean     | 279      |
| time/              |          |
|    fps             | 218      |
|    iterations      | 114      |
|    time_elapsed    | 1070     |
|    total_timesteps | 233472   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 244          |
|    mean_reward          | 282          |
| time/                   |              |
|    total_timesteps      | 233944       |
| train/                  |              |
|    approx_kl            | 0.0050579654 |
|    clip_fraction        | 0.031        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.621       |
|    explained_variance   | 0.827        |
|    learning_rate        | 0.0003       |
|    loss                 | 63.9         |
|    n_updates            | 2430         |
|    policy_gradient_loss | -0.00338     |
|    value_loss           | 223          |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 237      |
|    mean_reward     | 271      |
| time/              |          |
|    total_timesteps | 234944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 287      |
|    ep_rew_mean     | 281      |
| time/              |          |
|    fps             | 218      |
|    iterations      | 115      |
|    time_elapsed    | 1078     |
|    total_timesteps | 235520   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 278         |
|    mean_reward          | 273         |
| time/                   |             |
|    total_timesteps      | 235944      |
| train/                  |             |
|    approx_kl            | 0.003760715 |
|    clip_fraction        | 0.0308      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.647      |
|    explained_variance   | 0.8         |
|    learning_rate        | 0.0003      |
|    loss                 | 16.5        |
|    n_updates            | 2440        |
|    policy_gradient_loss | -0.00405    |
|    value_loss           | 36.1        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 245      |
|    mean_reward     | 270      |
| time/              |          |
|    total_timesteps | 236944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 289      |
|    ep_rew_mean     | 281      |
| time/              |          |
|    fps             | 218      |
|    iterations      | 116      |
|    time_elapsed    | 1087     |
|    total_timesteps | 237568   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 250          |
|    mean_reward          | 270          |
| time/                   |              |
|    total_timesteps      | 237944       |
| train/                  |              |
|    approx_kl            | 0.0051047746 |
|    clip_fraction        | 0.0816       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.626       |
|    explained_variance   | 0.76         |
|    learning_rate        | 0.0003       |
|    loss                 | 37.1         |
|    n_updates            | 2450         |
|    policy_gradient_loss | -0.00379     |
|    value_loss           | 118          |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 248      |
|    mean_reward     | 262      |
| time/              |          |
|    total_timesteps | 238944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 292      |
|    ep_rew_mean     | 288      |
| time/              |          |
|    fps             | 218      |
|    iterations      | 117      |
|    time_elapsed    | 1095     |
|    total_timesteps | 239616   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 233         |
|    mean_reward          | 286         |
| time/                   |             |
|    total_timesteps      | 239944      |
| train/                  |             |
|    approx_kl            | 0.018588576 |
|    clip_fraction        | 0.0967      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.678      |
|    explained_variance   | 0.886       |
|    learning_rate        | 0.0003      |
|    loss                 | 2.87        |
|    n_updates            | 2460        |
|    policy_gradient_loss | -0.00319    |
|    value_loss           | 14.8        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 231      |
|    mean_reward     | 288      |
| time/              |          |
|    total_timesteps | 240944   |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 289      |
|    ep_rew_mean     | 288      |
| time/              |          |
|    fps             | 218      |
|    iterations      | 118      |
|    time_elapsed    | 1103     |
|    total_timesteps | 241664   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 236         |
|    mean_reward          | 268         |
| time/                   |             |
|    total_timesteps      | 241944      |
| train/                  |             |
|    approx_kl            | 0.005731873 |
|    clip_fraction        | 0.0661      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.635      |
|    explained_variance   | 0.847       |
|    learning_rate        | 0.0003      |
|    loss                 | 7.98        |
|    n_updates            | 2470        |
|    policy_gradient_loss | -0.00161    |
|    value_loss           | 82.4        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 238      |
|    mean_reward     | 280      |
| time/              |          |
|    total_timesteps | 242944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 293      |
|    ep_rew_mean     | 285      |
| time/              |          |
|    fps             | 219      |
|    iterations      | 119      |
|    time_elapsed    | 1112     |
|    total_timesteps | 243712   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 247          |
|    mean_reward          | 275          |
| time/                   |              |
|    total_timesteps      | 243944       |
| train/                  |              |
|    approx_kl            | 0.0065952153 |
|    clip_fraction        | 0.0883       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.603       |
|    explained_variance   | 0.881        |
|    learning_rate        | 0.0003       |
|    loss                 | 19.3         |
|    n_updates            | 2480         |
|    policy_gradient_loss | -0.000445    |
|    value_loss           | 91.3         |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 217      |
|    mean_reward     | 229      |
| time/              |          |
|    total_timesteps | 244944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 287      |
|    ep_rew_mean     | 284      |
| time/              |          |
|    fps             | 219      |
|    iterations      | 120      |
|    time_elapsed    | 1120     |
|    total_timesteps | 245760   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 236          |
|    mean_reward          | 265          |
| time/                   |              |
|    total_timesteps      | 245944       |
| train/                  |              |
|    approx_kl            | 0.0044519673 |
|    clip_fraction        | 0.0372       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.634       |
|    explained_variance   | 0.843        |
|    learning_rate        | 0.0003       |
|    loss                 | 34           |
|    n_updates            | 2490         |
|    policy_gradient_loss | -0.00271     |
|    value_loss           | 43.1         |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 230      |
|    mean_reward     | 276      |
| time/              |          |
|    total_timesteps | 246944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 283      |
|    ep_rew_mean     | 283      |
| time/              |          |
|    fps             | 219      |
|    iterations      | 121      |
|    time_elapsed    | 1128     |
|    total_timesteps | 247808   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 242          |
|    mean_reward          | 272          |
| time/                   |              |
|    total_timesteps      | 247944       |
| train/                  |              |
|    approx_kl            | 0.0030533732 |
|    clip_fraction        | 0.0644       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.615       |
|    explained_variance   | 0.888        |
|    learning_rate        | 0.0003       |
|    loss                 | 4.42         |
|    n_updates            | 2500         |
|    policy_gradient_loss | -0.00252     |
|    value_loss           | 22.8         |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 233      |
|    mean_reward     | 265      |
| time/              |          |
|    total_timesteps | 248944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 282      |
|    ep_rew_mean     | 281      |
| time/              |          |
|    fps             | 219      |
|    iterations      | 122      |
|    time_elapsed    | 1136     |
|    total_timesteps | 249856   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 241          |
|    mean_reward          | 272          |
| time/                   |              |
|    total_timesteps      | 249944       |
| train/                  |              |
|    approx_kl            | 0.0031139052 |
|    clip_fraction        | 0.058        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.543       |
|    explained_variance   | 0.748        |
|    learning_rate        | 0.0003       |
|    loss                 | 7.66         |
|    n_updates            | 2510         |
|    policy_gradient_loss | -0.00125     |
|    value_loss           | 96.7         |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 358      |
|    mean_reward     | 215      |
| time/              |          |
|    total_timesteps | 250944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 274      |
|    ep_rew_mean     | 277      |
| time/              |          |
|    fps             | 219      |
|    iterations      | 123      |
|    time_elapsed    | 1145     |
|    total_timesteps | 251904   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 236         |
|    mean_reward          | 263         |
| time/                   |             |
|    total_timesteps      | 251944      |
| train/                  |             |
|    approx_kl            | 0.003870957 |
|    clip_fraction        | 0.0364      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.624      |
|    explained_variance   | 0.754       |
|    learning_rate        | 0.0003      |
|    loss                 | 11.2        |
|    n_updates            | 2520        |
|    policy_gradient_loss | -0.00198    |
|    value_loss           | 85.2        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 236      |
|    mean_reward     | 273      |
| time/              |          |
|    total_timesteps | 252944   |
---------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 246      |
|    mean_reward     | 265      |
| time/              |          |
|    total_timesteps | 253944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 272      |
|    ep_rew_mean     | 274      |
| time/              |          |
|    fps             | 219      |
|    iterations      | 124      |
|    time_elapsed    | 1154     |
|    total_timesteps | 253952   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 246          |
|    mean_reward          | 264          |
| time/                   |              |
|    total_timesteps      | 254944       |
| train/                  |              |
|    approx_kl            | 0.0037183997 |
|    clip_fraction        | 0.0396       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.592       |
|    explained_variance   | 0.706        |
|    learning_rate        | 0.0003       |
|    loss                 | 11           |
|    n_updates            | 2530         |
|    policy_gradient_loss | -0.00133     |
|    value_loss           | 45.9         |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 227      |
|    mean_reward     | 268      |
| time/              |          |
|    total_timesteps | 255944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 266      |
|    ep_rew_mean     | 274      |
| time/              |          |
|    fps             | 220      |
|    iterations      | 125      |
|    time_elapsed    | 1162     |
|    total_timesteps | 256000   |
---------------------------------


----------------------------------------
| eval/                   |            |
|    mean_ep_length       | 240        |
|    mean_reward          | 260        |
| time/                   |            |
|    total_timesteps      | 256944     |
| train/                  |            |
|    approx_kl            | 0.00524404 |
|    clip_fraction        | 0.0497     |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.665     |
|    explained_variance   | 0.827      |
|    learning_rate        | 0.0003     |
|    loss                 | 18         |
|    n_updates            | 2540       |
|    policy_gradient_loss | -0.00515   |
|    value_loss           | 132        |
----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 393      |
|    mean_reward     | 249      |
| time/              |          |
|    total_timesteps | 257944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 262      |
|    ep_rew_mean     | 276      |
| time/              |          |
|    fps             | 220      |
|    iterations      | 126      |
|    time_elapsed    | 1172     |
|    total_timesteps | 258048   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 228          |
|    mean_reward          | 274          |
| time/                   |              |
|    total_timesteps      | 258944       |
| train/                  |              |
|    approx_kl            | 0.0076764943 |
|    clip_fraction        | 0.0479       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.634       |
|    explained_variance   | 0.906        |
|    learning_rate        | 0.0003       |
|    loss                 | 38.5         |
|    n_updates            | 2550         |
|    policy_gradient_loss | -0.00252     |
|    value_loss           | 37.5         |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 242      |
|    mean_reward     | 264      |
| time/              |          |
|    total_timesteps | 259944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 265      |
|    ep_rew_mean     | 281      |
| time/              |          |
|    fps             | 220      |
|    iterations      | 127      |
|    time_elapsed    | 1180     |
|    total_timesteps | 260096   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 220         |
|    mean_reward          | 288         |
| time/                   |             |
|    total_timesteps      | 260944      |
| train/                  |             |
|    approx_kl            | 0.006195317 |
|    clip_fraction        | 0.0598      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.686      |
|    explained_variance   | 0.94        |
|    learning_rate        | 0.0003      |
|    loss                 | 32.1        |
|    n_updates            | 2560        |
|    policy_gradient_loss | -0.00394    |
|    value_loss           | 45.7        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 242      |
|    mean_reward     | 282      |
| time/              |          |
|    total_timesteps | 261944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 267      |
|    ep_rew_mean     | 283      |
| time/              |          |
|    fps             | 220      |
|    iterations      | 128      |
|    time_elapsed    | 1188     |
|    total_timesteps | 262144   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 236          |
|    mean_reward          | 279          |
| time/                   |              |
|    total_timesteps      | 262944       |
| train/                  |              |
|    approx_kl            | 0.0063874153 |
|    clip_fraction        | 0.0529       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.617       |
|    explained_variance   | 0.849        |
|    learning_rate        | 0.0003       |
|    loss                 | 14.9         |
|    n_updates            | 2570         |
|    policy_gradient_loss | -0.000537    |
|    value_loss           | 34.5         |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 235      |
|    mean_reward     | 265      |
| time/              |          |
|    total_timesteps | 263944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 268      |
|    ep_rew_mean     | 287      |
| time/              |          |
|    fps             | 220      |
|    iterations      | 129      |
|    time_elapsed    | 1196     |
|    total_timesteps | 264192   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 238          |
|    mean_reward          | 289          |
| time/                   |              |
|    total_timesteps      | 264944       |
| train/                  |              |
|    approx_kl            | 0.0019348162 |
|    clip_fraction        | 0.0271       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.592       |
|    explained_variance   | 0.877        |
|    learning_rate        | 0.0003       |
|    loss                 | 10.4         |
|    n_updates            | 2580         |
|    policy_gradient_loss | -0.00127     |
|    value_loss           | 24           |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 279      |
|    mean_reward     | 266      |
| time/              |          |
|    total_timesteps | 265944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 268      |
|    ep_rew_mean     | 286      |
| time/              |          |
|    fps             | 221      |
|    iterations      | 130      |
|    time_elapsed    | 1204     |
|    total_timesteps | 266240   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 250          |
|    mean_reward          | 272          |
| time/                   |              |
|    total_timesteps      | 266944       |
| train/                  |              |
|    approx_kl            | 0.0061136186 |
|    clip_fraction        | 0.0624       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.599       |
|    explained_variance   | 0.889        |
|    learning_rate        | 0.0003       |
|    loss                 | 8.49         |
|    n_updates            | 2590         |
|    policy_gradient_loss | -0.00277     |
|    value_loss           | 27.8         |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 240      |
|    mean_reward     | 276      |
| time/              |          |
|    total_timesteps | 267944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 269      |
|    ep_rew_mean     | 285      |
| time/              |          |
|    fps             | 221      |
|    iterations      | 131      |
|    time_elapsed    | 1213     |
|    total_timesteps | 268288   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 245         |
|    mean_reward          | 277         |
| time/                   |             |
|    total_timesteps      | 268944      |
| train/                  |             |
|    approx_kl            | 0.004283173 |
|    clip_fraction        | 0.0465      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.629      |
|    explained_variance   | 0.717       |
|    learning_rate        | 0.0003      |
|    loss                 | 40.1        |
|    n_updates            | 2600        |
|    policy_gradient_loss | -0.000853   |
|    value_loss           | 70.9        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 240      |
|    mean_reward     | 266      |
| time/              |          |
|    total_timesteps | 269944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 264      |
|    ep_rew_mean     | 286      |
| time/              |          |
|    fps             | 221      |
|    iterations      | 132      |
|    time_elapsed    | 1221     |
|    total_timesteps | 270336   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 243         |
|    mean_reward          | 275         |
| time/                   |             |
|    total_timesteps      | 270944      |
| train/                  |             |
|    approx_kl            | 0.003869761 |
|    clip_fraction        | 0.056       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.607      |
|    explained_variance   | 0.915       |
|    learning_rate        | 0.0003      |
|    loss                 | 6.11        |
|    n_updates            | 2610        |
|    policy_gradient_loss | -0.00509    |
|    value_loss           | 20.4        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 288      |
|    mean_reward     | 269      |
| time/              |          |
|    total_timesteps | 271944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 264      |
|    ep_rew_mean     | 287      |
| time/              |          |
|    fps             | 221      |
|    iterations      | 133      |
|    time_elapsed    | 1229     |
|    total_timesteps | 272384   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 222          |
|    mean_reward          | 273          |
| time/                   |              |
|    total_timesteps      | 272944       |
| train/                  |              |
|    approx_kl            | 0.0054229223 |
|    clip_fraction        | 0.054        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.637       |
|    explained_variance   | 0.714        |
|    learning_rate        | 0.0003       |
|    loss                 | 9.15         |
|    n_updates            | 2620         |
|    policy_gradient_loss | -0.00285     |
|    value_loss           | 53.3         |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 264      |
|    mean_reward     | 270      |
| time/              |          |
|    total_timesteps | 273944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 275      |
|    ep_rew_mean     | 289      |
| time/              |          |
|    fps             | 221      |
|    iterations      | 134      |
|    time_elapsed    | 1238     |
|    total_timesteps | 274432   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 237         |
|    mean_reward          | 259         |
| time/                   |             |
|    total_timesteps      | 274944      |
| train/                  |             |
|    approx_kl            | 0.005039095 |
|    clip_fraction        | 0.046       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.653      |
|    explained_variance   | 0.662       |
|    learning_rate        | 0.0003      |
|    loss                 | 6.68        |
|    n_updates            | 2630        |
|    policy_gradient_loss | -0.00275    |
|    value_loss           | 20.1        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 260      |
|    mean_reward     | 270      |
| time/              |          |
|    total_timesteps | 275944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 281      |
|    ep_rew_mean     | 293      |
| time/              |          |
|    fps             | 221      |
|    iterations      | 135      |
|    time_elapsed    | 1246     |
|    total_timesteps | 276480   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 350         |
|    mean_reward          | 274         |
| time/                   |             |
|    total_timesteps      | 276944      |
| train/                  |             |
|    approx_kl            | 0.005627361 |
|    clip_fraction        | 0.0647      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.635      |
|    explained_variance   | 0.943       |
|    learning_rate        | 0.0003      |
|    loss                 | 48.6        |
|    n_updates            | 2640        |
|    policy_gradient_loss | -0.0025     |
|    value_loss           | 107         |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 224      |
|    mean_reward     | 270      |
| time/              |          |
|    total_timesteps | 277944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 296      |
|    ep_rew_mean     | 292      |
| time/              |          |
|    fps             | 221      |
|    iterations      | 136      |
|    time_elapsed    | 1255     |
|    total_timesteps | 278528   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 235         |
|    mean_reward          | 273         |
| time/                   |             |
|    total_timesteps      | 278944      |
| train/                  |             |
|    approx_kl            | 0.004294297 |
|    clip_fraction        | 0.108       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.525      |
|    explained_variance   | 0.965       |
|    learning_rate        | 0.0003      |
|    loss                 | 105         |
|    n_updates            | 2650        |
|    policy_gradient_loss | 0.000574    |
|    value_loss           | 63.5        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 386      |
|    mean_reward     | 251      |
| time/              |          |
|    total_timesteps | 279944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 298      |
|    ep_rew_mean     | 293      |
| time/              |          |
|    fps             | 221      |
|    iterations      | 137      |
|    time_elapsed    | 1264     |
|    total_timesteps | 280576   |
---------------------------------


----------------------------------------
| eval/                   |            |
|    mean_ep_length       | 243        |
|    mean_reward          | 280        |
| time/                   |            |
|    total_timesteps      | 280944     |
| train/                  |            |
|    approx_kl            | 0.00479874 |
|    clip_fraction        | 0.0386     |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.606     |
|    explained_variance   | 0.847      |
|    learning_rate        | 0.0003     |
|    loss                 | 11.6       |
|    n_updates            | 2660       |
|    policy_gradient_loss | -0.00376   |
|    value_loss           | 53.4       |
----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 237      |
|    mean_reward     | 280      |
| time/              |          |
|    total_timesteps | 281944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 297      |
|    ep_rew_mean     | 295      |
| time/              |          |
|    fps             | 222      |
|    iterations      | 138      |
|    time_elapsed    | 1272     |
|    total_timesteps | 282624   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 254         |
|    mean_reward          | 265         |
| time/                   |             |
|    total_timesteps      | 282944      |
| train/                  |             |
|    approx_kl            | 0.003544339 |
|    clip_fraction        | 0.0364      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.618      |
|    explained_variance   | 0.86        |
|    learning_rate        | 0.0003      |
|    loss                 | 11.5        |
|    n_updates            | 2670        |
|    policy_gradient_loss | -0.00299    |
|    value_loss           | 31.7        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 244      |
|    mean_reward     | 281      |
| time/              |          |
|    total_timesteps | 283944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 300      |
|    ep_rew_mean     | 297      |
| time/              |          |
|    fps             | 222      |
|    iterations      | 139      |
|    time_elapsed    | 1280     |
|    total_timesteps | 284672   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 227         |
|    mean_reward          | 279         |
| time/                   |             |
|    total_timesteps      | 284944      |
| train/                  |             |
|    approx_kl            | 0.007482299 |
|    clip_fraction        | 0.0679      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.626      |
|    explained_variance   | 0.917       |
|    learning_rate        | 0.0003      |
|    loss                 | 20.2        |
|    n_updates            | 2680        |
|    policy_gradient_loss | -0.00596    |
|    value_loss           | 43.7        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 248      |
|    mean_reward     | 288      |
| time/              |          |
|    total_timesteps | 285944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 302      |
|    ep_rew_mean     | 295      |
| time/              |          |
|    fps             | 222      |
|    iterations      | 140      |
|    time_elapsed    | 1288     |
|    total_timesteps | 286720   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 227         |
|    mean_reward          | 273         |
| time/                   |             |
|    total_timesteps      | 286944      |
| train/                  |             |
|    approx_kl            | 0.024802756 |
|    clip_fraction        | 0.183       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.563      |
|    explained_variance   | 0.988       |
|    learning_rate        | 0.0003      |
|    loss                 | 4.17        |
|    n_updates            | 2690        |
|    policy_gradient_loss | -0.00458    |
|    value_loss           | 12.6        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 282      |
|    mean_reward     | 264      |
| time/              |          |
|    total_timesteps | 287944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 295      |
|    ep_rew_mean     | 293      |
| time/              |          |
|    fps             | 222      |
|    iterations      | 141      |
|    time_elapsed    | 1297     |
|    total_timesteps | 288768   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 231          |
|    mean_reward          | 277          |
| time/                   |              |
|    total_timesteps      | 288944       |
| train/                  |              |
|    approx_kl            | 0.0053924336 |
|    clip_fraction        | 0.0536       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.608       |
|    explained_variance   | 0.866        |
|    learning_rate        | 0.0003       |
|    loss                 | 15.8         |
|    n_updates            | 2700         |
|    policy_gradient_loss | -0.00423     |
|    value_loss           | 42           |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 243      |
|    mean_reward     | 286      |
| time/              |          |
|    total_timesteps | 289944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 294      |
|    ep_rew_mean     | 292      |
| time/              |          |
|    fps             | 222      |
|    iterations      | 142      |
|    time_elapsed    | 1305     |
|    total_timesteps | 290816   |
---------------------------------


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 224          |
|    mean_reward          | 282          |
| time/                   |              |
|    total_timesteps      | 290944       |
| train/                  |              |
|    approx_kl            | 0.0067748846 |
|    clip_fraction        | 0.0565       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.649       |
|    explained_variance   | 0.844        |
|    learning_rate        | 0.0003       |
|    loss                 | 9.64         |
|    n_updates            | 2710         |
|    policy_gradient_loss | -0.00283     |
|    value_loss           | 26.8         |
------------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 219      |
|    mean_reward     | 281      |
| time/              |          |
|    total_timesteps | 291944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 293      |
|    ep_rew_mean     | 289      |
| time/              |          |
|    fps             | 223      |
|    iterations      | 143      |
|    time_elapsed    | 1313     |
|    total_timesteps | 292864   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 224         |
|    mean_reward          | 266         |
| time/                   |             |
|    total_timesteps      | 292944      |
| train/                  |             |
|    approx_kl            | 0.004409987 |
|    clip_fraction        | 0.0341      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.565      |
|    explained_variance   | 0.731       |
|    learning_rate        | 0.0003      |
|    loss                 | 60.7        |
|    n_updates            | 2720        |
|    policy_gradient_loss | -0.00156    |
|    value_loss           | 85.6        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 206      |
|    mean_reward     | 225      |
| time/              |          |
|    total_timesteps | 293944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 292      |
|    ep_rew_mean     | 288      |
| time/              |          |
|    fps             | 223      |
|    iterations      | 144      |
|    time_elapsed    | 1321     |
|    total_timesteps | 294912   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 231         |
|    mean_reward          | 283         |
| time/                   |             |
|    total_timesteps      | 294944      |
| train/                  |             |
|    approx_kl            | 0.006610321 |
|    clip_fraction        | 0.0709      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.573      |
|    explained_variance   | 0.984       |
|    learning_rate        | 0.0003      |
|    loss                 | 7.27        |
|    n_updates            | 2730        |
|    policy_gradient_loss | -0.00158    |
|    value_loss           | 15.2        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 233      |
|    mean_reward     | 280      |
| time/              |          |
|    total_timesteps | 295944   |
---------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 242      |
|    mean_reward     | 273      |
| time/              |          |
|    total_timesteps | 296944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 301      |
|    ep_rew_mean     | 292      |
| time/              |          |
|    fps             | 223      |
|    iterations      | 145      |
|    time_elapsed    | 1330     |
|    total_timesteps | 296960   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 218         |
|    mean_reward          | 285         |
| time/                   |             |
|    total_timesteps      | 297944      |
| train/                  |             |
|    approx_kl            | 0.005334951 |
|    clip_fraction        | 0.067       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.605      |
|    explained_variance   | 0.886       |
|    learning_rate        | 0.0003      |
|    loss                 | 13.6        |
|    n_updates            | 2740        |
|    policy_gradient_loss | -0.00157    |
|    value_loss           | 31.1        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 222      |
|    mean_reward     | 293      |
| time/              |          |
|    total_timesteps | 298944   |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 299      |
|    ep_rew_mean     | 290      |
| time/              |          |
|    fps             | 223      |
|    iterations      | 146      |
|    time_elapsed    | 1338     |
|    total_timesteps | 299008   |
---------------------------------


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 328         |
|    mean_reward          | 249         |
| time/                   |             |
|    total_timesteps      | 299944      |
| train/                  |             |
|    approx_kl            | 0.008010677 |
|    clip_fraction        | 0.0857      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.679      |
|    explained_variance   | 0.927       |
|    learning_rate        | 0.0003      |
|    loss                 | 7.75        |
|    n_updates            | 2750        |
|    policy_gradient_loss | -0.00188    |
|    value_loss           | 14.2        |
-----------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 235      |
|    mean_reward     | 279      |
| time/              |          |
|    total_timesteps | 300944   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 300      |
|    ep_rew_mean     | 293      |
| time/              |          |
|    fps             | 223      |
|    iterations      | 147      |
|    time_elapsed    | 1347     |
|    total_timesteps | 301056   |
---------------------------------


<stable_baselines3.ppo.ppo.PPO at 0x7fddea2bd1b0>

: 

In [20]:
model = PPO.load("./lunar_lander_logs/best_model.zip", env=env)

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


## Putting through the fire!

In [7]:
# Enjoy trained agent
total_reward = 0
vec_env = model.get_env()
obs = vec_env.reset()
for i in range(10_000):
    action, _states = model.predict(obs, deterministic=True)
    obs, rewards, dones, info = vec_env.step(action)
    total_reward += rewards

    vec_env.render("human")

print(f"Reward: {total_reward}")
vec_env.close()

Reward: [11530.818]
