In [45]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import pandas as pd
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)
import feature_manager as fma
import rl.env_simple_crypto_trade as env
import importlib
import rl.models as rla
import config as cf
import numpy as np
from torch.nn import Tanh
import tr_utils

In [None]:
importlib.reload(fma)

fm = fma.FeatureManager()
fm.import_data(symbol="BTCUSDT",timeframes=["1d","1w","1mo"])

fm.build_features(
    lags = [1,1,1],
    features=cf.FOR_4H_INDICATORS,
)

In [44]:
len(fm.df)

1005

In [None]:
importlib.reload(env)
importlib.reload(cf)

env_kwargs = cf.TRADE_ENV_PARAMETER
state_space = 10 + len(fm.cols)

full_env = env.CryptoTradingEnv(
    trade_timeframe="1d",
    df = fm.df, 
    state_space=state_space,
    indicators=fm.cols,
    **env_kwargs)

In [None]:
len(full_env.df)

In [None]:
importlib.reload(rla)
agent = rla.DRLTradeAgent(env=full_env)

In [None]:
importlib.reload(rla)

# DQN_PARAMS = {
#     "learning_rate": 1e-4,
#     "buffer_size": 100_000,  
#     "learning_starts": 100_000,
#     "batch_size": 64,
#     "tau": 1.0,
#     "gamma": 0.9999,
#     "train_freq": 4,
#     "target_update_interval": 10000,
#     "exploration_fraction": 0.3,
#     "exploration_initial_eps": 1.0,
#     "exploration_final_eps": 0.05,
# }

# catalog_name = tr_utils.get_name_with_kwargs("dqn",DQN_PARAMS)

# dqn_model = agent.get_model(
#     model_name="dqn",
#     model_kwargs=DQN_PARAMS,
#     seed=100,
#     tensorboard_log=catalog_name
# )

PPO_MODEL_PARAMS = {
    "gamma": 0.99,
    "gae_lambda": 0.95,
    "n_steps": 1024,
    "ent_coef": 0.03,
    "learning_rate": 0.0002,
    "batch_size": 64
}

PPO_POLICY_PARAMS = {
    "activation_fn":Tanh,
    "net_arch": dict(pi=[128], vf=[128])
}

catalog_name = tr_utils.get_name_with_kwargs("1d_",PPO_MODEL_PARAMS)

ppo_model = agent.get_model(
    model_name="ppo",
    model_kwargs = PPO_MODEL_PARAMS,
    policy_kwargs= PPO_POLICY_PARAMS,
    tensorboard_log=catalog_name,
    seed = 100
)


In [None]:
catalog_name

In [None]:
ppo_model.policy_kwargs

In [43]:
selected_model = ppo_model

selected_model = agent.train_model(
    model = selected_model,
    total_timesteps = 1_000_000,
    checkpoint = True,
    catalog_name = catalog_name,
    save_frequency = 20_000,
    progress_bar = True
)

-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.489         |
| time/                   |               |
|    fps                  | 164           |
|    iterations           | 89            |
|    time_elapsed         | 553           |
|    total_timesteps      | 91136         |
| train/                  |               |
|    approx_kl            | 0.00027597573 |
|    clip_fraction        | 0.00889       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.232        |
|    explained_variance   | 0.00154       |
|    learning_rate        | 0.0002        |
|    loss                 | -0.000962     |
|    n_updates            | 880           |
|    policy_gradient_loss | -0.000453     |
|    reward               | 0.0           |
|    value_loss           | 0.0184        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.49         |
| time/                   |              |
|    fps                  | 164          |
|    iterations           | 91           |
|    time_elapsed         | 566          |
|    total_timesteps      | 93184        |
| train/                  |              |
|    approx_kl            | 0.0007260723 |
|    clip_fraction        | 0.0112       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.19        |
|    explained_variance   | 0.000504     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0141       |
|    n_updates            | 900          |
|    policy_gradient_loss | -0.00178     |
|    reward               | 0.0040146452 |
|    value_loss           | 0.0324       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.493        |
| time/                   |              |
|    fps                  | 164          |
|    iterations           | 92           |
|    time_elapsed         | 571          |
|    total_timesteps      | 94208        |
| train/                  |              |
|    approx_kl            | 0.0010669219 |
|    clip_fraction        | 0.0181       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.197       |
|    explained_variance   | 0.000348     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0224       |
|    n_updates            | 910          |
|    policy_gradient_loss | -0.00196     |
|    reward               | 0.0          |
|    value_loss           | 0.0508       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.497        |
| time/                   |              |
|    fps                  | 164          |
|    iterations           | 93           |
|    time_elapsed         | 578          |
|    total_timesteps      | 95232        |
| train/                  |              |
|    approx_kl            | 0.0012172242 |
|    clip_fraction        | 0.0145       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.204       |
|    explained_variance   | 0.000125     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00991      |
|    n_updates            | 920          |
|    policy_gradient_loss | -0.000922    |
|    reward               | 0.0          |
|    value_loss           | 0.0248       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.503        |
| time/                   |              |
|    fps                  | 164          |
|    iterations           | 94           |
|    time_elapsed         | 584          |
|    total_timesteps      | 96256        |
| train/                  |              |
|    approx_kl            | 0.0011271774 |
|    clip_fraction        | 0.0154       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.206       |
|    explained_variance   | 0.00057      |
|    learning_rate        | 0.0002       |
|    loss                 | -0.0012      |
|    n_updates            | 930          |
|    policy_gradient_loss | -0.00196     |
|    reward               | 0.025579385  |
|    value_loss           | 0.023        |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.506         |
| time/                   |               |
|    fps                  | 164           |
|    iterations           | 95            |
|    time_elapsed         | 590           |
|    total_timesteps      | 97280         |
| train/                  |               |
|    approx_kl            | 0.00039726606 |
|    clip_fraction        | 0.0125        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.204        |
|    explained_variance   | 0.0013        |
|    learning_rate        | 0.0002        |
|    loss                 | -0.00334      |
|    n_updates            | 940           |
|    policy_gradient_loss | -0.00185      |
|    reward               | 0.0           |
|    value_loss           | 0.0261        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.508        |
| time/                   |              |
|    fps                  | 164          |
|    iterations           | 96           |
|    time_elapsed         | 596          |
|    total_timesteps      | 98304        |
| train/                  |              |
|    approx_kl            | 8.172827e-05 |
|    clip_fraction        | 0.00762      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.217       |
|    explained_variance   | 0.000595     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00931      |
|    n_updates            | 950          |
|    policy_gradient_loss | 0.000782     |
|    reward               | 0.0          |
|    value_loss           | 0.0319       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.51         |
| time/                   |              |
|    fps                  | 164          |
|    iterations           | 98           |
|    time_elapsed         | 608          |
|    total_timesteps      | 100352       |
| train/                  |              |
|    approx_kl            | 0.0010252735 |
|    clip_fraction        | 0.00518      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.209       |
|    explained_variance   | 0.000725     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00386      |
|    n_updates            | 970          |
|    policy_gradient_loss | -0.000875    |
|    reward               | 0.0          |
|    value_loss           | 0.0284       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.513         |
| time/                   |               |
|    fps                  | 165           |
|    iterations           | 99            |
|    time_elapsed         | 614           |
|    total_timesteps      | 101376        |
| train/                  |               |
|    approx_kl            | 0.00091087987 |
|    clip_fraction        | 0.0041        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.187        |
|    explained_variance   | 0.000474      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00401       |
|    n_updates            | 980           |
|    policy_gradient_loss | -0.000365     |
|    reward               | 0.008007298   |
|    value_loss           | 0.0286        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.527         |
| time/                   |               |
|    fps                  | 164           |
|    iterations           | 100           |
|    time_elapsed         | 620           |
|    total_timesteps      | 102400        |
| train/                  |               |
|    approx_kl            | 0.00070868985 |
|    clip_fraction        | 0.0133        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.166        |
|    explained_variance   | 0.000332      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.000684      |
|    n_updates            | 990           |
|    policy_gradient_loss | -0.00186      |
|    reward               | -0.11494717   |
|    value_loss           | 0.0221        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.544         |
| time/                   |               |
|    fps                  | 165           |
|    iterations           | 101           |
|    time_elapsed         | 626           |
|    total_timesteps      | 103424        |
| train/                  |               |
|    approx_kl            | 0.00048667943 |
|    clip_fraction        | 0.0126        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.201        |
|    explained_variance   | -0.000448     |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00974       |
|    n_updates            | 1000          |
|    policy_gradient_loss | 0.000348      |
|    reward               | 0.0           |
|    value_loss           | 0.0231        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.558         |
| time/                   |               |
|    fps                  | 165           |
|    iterations           | 102           |
|    time_elapsed         | 632           |
|    total_timesteps      | 104448        |
| train/                  |               |
|    approx_kl            | 0.00070079725 |
|    clip_fraction        | 0.0167        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.239        |
|    explained_variance   | 0.000175      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00423       |
|    n_updates            | 1010          |
|    policy_gradient_loss | 0.000607      |
|    reward               | 0.005768965   |
|    value_loss           | 0.0273        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.565        |
| time/                   |              |
|    fps                  | 165          |
|    iterations           | 103          |
|    time_elapsed         | 638          |
|    total_timesteps      | 105472       |
| train/                  |              |
|    approx_kl            | 0.0011548745 |
|    clip_fraction        | 0.0152       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.262       |
|    explained_variance   | 0.000807     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00577      |
|    n_updates            | 1020         |
|    policy_gradient_loss | 2.63e-05     |
|    reward               | -0.47933006  |
|    value_loss           | 0.0246       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.57         |
| time/                   |              |
|    fps                  | 164          |
|    iterations           | 104          |
|    time_elapsed         | 645          |
|    total_timesteps      | 106496       |
| train/                  |              |
|    approx_kl            | 0.0002758545 |
|    clip_fraction        | 0.00625      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.289       |
|    explained_variance   | 0.00213      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00371      |
|    n_updates            | 1030         |
|    policy_gradient_loss | 0.001        |
|    reward               | 0.0          |
|    value_loss           | 0.0231       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.567        |
| time/                   |              |
|    fps                  | 164          |
|    iterations           | 105          |
|    time_elapsed         | 651          |
|    total_timesteps      | 107520       |
| train/                  |              |
|    approx_kl            | 0.0017574768 |
|    clip_fraction        | 0.024        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.242       |
|    explained_variance   | 0.00112      |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00399     |
|    n_updates            | 1040         |
|    policy_gradient_loss | -0.00393     |
|    reward               | 0.007780072  |
|    value_loss           | 0.0183       |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 0.614       |
| time/                   |             |
|    fps                  | 165         |
|    iterations           | 106         |
|    time_elapsed         | 657         |
|    total_timesteps      | 108544      |
| train/                  |             |
|    approx_kl            | 0.001142873 |
|    clip_fraction        | 0.0275      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.275      |
|    explained_variance   | 0.000446    |
|    learning_rate        | 0.0002      |
|    loss                 | 0.0133      |
|    n_updates            | 1050        |
|    policy_gradient_loss | -0.000823   |
|    reward               | 0.0         |
|    value_loss           | 0.0324      |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.628        |
| time/                   |              |
|    fps                  | 165          |
|    iterations           | 107          |
|    time_elapsed         | 663          |
|    total_timesteps      | 109568       |
| train/                  |              |
|    approx_kl            | 0.0013255901 |
|    clip_fraction        | 0.015        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.261       |
|    explained_variance   | -0.000478    |
|    learning_rate        | 0.0002       |
|    loss                 | -0.0143      |
|    n_updates            | 1060         |
|    policy_gradient_loss | -0.00167     |
|    reward               | 0.002715485  |
|    value_loss           | 0.0192       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.626        |
| time/                   |              |
|    fps                  | 165          |
|    iterations           | 108          |
|    time_elapsed         | 669          |
|    total_timesteps      | 110592       |
| train/                  |              |
|    approx_kl            | 0.0010722085 |
|    clip_fraction        | 0.0131       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.217       |
|    explained_variance   | 0.00082      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00443      |
|    n_updates            | 1070         |
|    policy_gradient_loss | -0.00282     |
|    reward               | 0.049539883  |
|    value_loss           | 0.022        |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.634         |
| time/                   |               |
|    fps                  | 165           |
|    iterations           | 109           |
|    time_elapsed         | 675           |
|    total_timesteps      | 111616        |
| train/                  |               |
|    approx_kl            | 0.00032409362 |
|    clip_fraction        | 0.0118        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.243        |
|    explained_variance   | 0.00161       |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0081        |
|    n_updates            | 1080          |
|    policy_gradient_loss | 0.000637      |
|    reward               | -0.2239277    |
|    value_loss           | 0.0217        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.641        |
| time/                   |              |
|    fps                  | 165          |
|    iterations           | 110          |
|    time_elapsed         | 681          |
|    total_timesteps      | 112640       |
| train/                  |              |
|    approx_kl            | 0.0009324334 |
|    clip_fraction        | 0.0197       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.204       |
|    explained_variance   | 0.00139      |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00651     |
|    n_updates            | 1090         |
|    policy_gradient_loss | -0.00328     |
|    reward               | 0.0046973783 |
|    value_loss           | 0.0206       |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 0.646       |
| time/                   |             |
|    fps                  | 165         |
|    iterations           | 111         |
|    time_elapsed         | 687         |
|    total_timesteps      | 113664      |
| train/                  |             |
|    approx_kl            | 0.001326822 |
|    clip_fraction        | 0.0138      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.187      |
|    explained_variance   | 0.000206    |
|    learning_rate        | 0.0002      |
|    loss                 | 0.016       |
|    n_updates            | 1100        |
|    policy_gradient_loss | -0.00168    |
|    reward               | 0.0         |
|    value_loss           | 0.0241      |
-----------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.647         |
| time/                   |               |
|    fps                  | 165           |
|    iterations           | 112           |
|    time_elapsed         | 693           |
|    total_timesteps      | 114688        |
| train/                  |               |
|    approx_kl            | 0.00071054575 |
|    clip_fraction        | 0.00557       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.178        |
|    explained_variance   | 0.0011        |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00675       |
|    n_updates            | 1110          |
|    policy_gradient_loss | -0.000457     |
|    reward               | 0.0           |
|    value_loss           | 0.0179        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.658        |
| time/                   |              |
|    fps                  | 165          |
|    iterations           | 113          |
|    time_elapsed         | 700          |
|    total_timesteps      | 115712       |
| train/                  |              |
|    approx_kl            | 0.0006361976 |
|    clip_fraction        | 0.0155       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.201       |
|    explained_variance   | 0.00204      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0131       |
|    n_updates            | 1120         |
|    policy_gradient_loss | -0.000192    |
|    reward               | 0.0044387677 |
|    value_loss           | 0.0348       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.667        |
| time/                   |              |
|    fps                  | 165          |
|    iterations           | 114          |
|    time_elapsed         | 706          |
|    total_timesteps      | 116736       |
| train/                  |              |
|    approx_kl            | 0.0015424872 |
|    clip_fraction        | 0.0258       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.233       |
|    explained_variance   | 0.000501     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0121       |
|    n_updates            | 1130         |
|    policy_gradient_loss | -0.00104     |
|    reward               | 0.026704118  |
|    value_loss           | 0.0266       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.673         |
| time/                   |               |
|    fps                  | 165           |
|    iterations           | 115           |
|    time_elapsed         | 712           |
|    total_timesteps      | 117760        |
| train/                  |               |
|    approx_kl            | 0.00035908417 |
|    clip_fraction        | 0.0108        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.264        |
|    explained_variance   | 0.000422      |
|    learning_rate        | 0.0002        |
|    loss                 | -0.000113     |
|    n_updates            | 1140          |
|    policy_gradient_loss | 0.000384      |
|    reward               | -0.03052181   |
|    value_loss           | 0.0222        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.688        |
| time/                   |              |
|    fps                  | 165          |
|    iterations           | 116          |
|    time_elapsed         | 718          |
|    total_timesteps      | 118784       |
| train/                  |              |
|    approx_kl            | 0.0005875459 |
|    clip_fraction        | 0.00215      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.269       |
|    explained_variance   | 6.74e-05     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0048       |
|    n_updates            | 1150         |
|    policy_gradient_loss | 0.000966     |
|    reward               | 0.012517589  |
|    value_loss           | 0.0239       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.688        |
| time/                   |              |
|    fps                  | 165          |
|    iterations           | 117          |
|    time_elapsed         | 724          |
|    total_timesteps      | 119808       |
| train/                  |              |
|    approx_kl            | 0.0007819202 |
|    clip_fraction        | 0.00371      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.282       |
|    explained_variance   | 0.001        |
|    learning_rate        | 0.0002       |
|    loss                 | 0.000397     |
|    n_updates            | 1160         |
|    policy_gradient_loss | 0.000122     |
|    reward               | 0.015196093  |
|    value_loss           | 0.0211       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.696        |
| time/                   |              |
|    fps                  | 165          |
|    iterations           | 118          |
|    time_elapsed         | 730          |
|    total_timesteps      | 120832       |
| train/                  |              |
|    approx_kl            | 0.0013680214 |
|    clip_fraction        | 0.0198       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.252       |
|    explained_variance   | 0.000658     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.0122      |
|    n_updates            | 1170         |
|    policy_gradient_loss | -0.00348     |
|    reward               | 0.05949247   |
|    value_loss           | 0.0183       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.7          |
| time/                   |              |
|    fps                  | 165          |
|    iterations           | 119          |
|    time_elapsed         | 737          |
|    total_timesteps      | 121856       |
| train/                  |              |
|    approx_kl            | 0.0010050095 |
|    clip_fraction        | 0.0194       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.235       |
|    explained_variance   | -2.62e-06    |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00263     |
|    n_updates            | 1180         |
|    policy_gradient_loss | -0.00173     |
|    reward               | -0.04946387  |
|    value_loss           | 0.0222       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.706        |
| time/                   |              |
|    fps                  | 165          |
|    iterations           | 120          |
|    time_elapsed         | 743          |
|    total_timesteps      | 122880       |
| train/                  |              |
|    approx_kl            | 0.0008585022 |
|    clip_fraction        | 0.0187       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.224       |
|    explained_variance   | 0.0004       |
|    learning_rate        | 0.0002       |
|    loss                 | -0.01        |
|    n_updates            | 1190         |
|    policy_gradient_loss | -0.00202     |
|    reward               | -0.008826301 |
|    value_loss           | 0.0199       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.715        |
| time/                   |              |
|    fps                  | 165          |
|    iterations           | 122          |
|    time_elapsed         | 755          |
|    total_timesteps      | 124928       |
| train/                  |              |
|    approx_kl            | 0.0010123025 |
|    clip_fraction        | 0.0188       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.224       |
|    explained_variance   | 0.000649     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00853      |
|    n_updates            | 1210         |
|    policy_gradient_loss | -0.00319     |
|    reward               | -0.00020002  |
|    value_loss           | 0.0223       |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 0.726       |
| time/                   |             |
|    fps                  | 165         |
|    iterations           | 123         |
|    time_elapsed         | 761         |
|    total_timesteps      | 125952      |
| train/                  |             |
|    approx_kl            | 0.001098353 |
|    clip_fraction        | 0.0151      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.191      |
|    explained_variance   | 0.000555    |
|    learning_rate        | 0.0002      |
|    loss                 | 0.00316     |
|    n_updates            | 1220        |
|    policy_gradient_loss | -0.00233    |
|    reward               | -0.06554608 |
|    value_loss           | 0.0204      |
-----------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.716         |
| time/                   |               |
|    fps                  | 165           |
|    iterations           | 124           |
|    time_elapsed         | 767           |
|    total_timesteps      | 126976        |
| train/                  |               |
|    approx_kl            | 0.00018623477 |
|    clip_fraction        | 0.00654       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.209        |
|    explained_variance   | 0.00121       |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0102        |
|    n_updates            | 1230          |
|    policy_gradient_loss | 0.000947      |
|    reward               | 0.06697041    |
|    value_loss           | 0.0367        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.704         |
| time/                   |               |
|    fps                  | 165           |
|    iterations           | 125           |
|    time_elapsed         | 773           |
|    total_timesteps      | 128000        |
| train/                  |               |
|    approx_kl            | 0.00055783836 |
|    clip_fraction        | 0.00527       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.212        |
|    explained_variance   | 0.000564      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0212        |
|    n_updates            | 1240          |
|    policy_gradient_loss | 0.000157      |
|    reward               | 0.0           |
|    value_loss           | 0.0282        |
-------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 0.699       |
| time/                   |             |
|    fps                  | 165         |
|    iterations           | 126         |
|    time_elapsed         | 780         |
|    total_timesteps      | 129024      |
| train/                  |             |
|    approx_kl            | 0.001316329 |
|    clip_fraction        | 0.00576     |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.211      |
|    explained_variance   | 0.000846    |
|    learning_rate        | 0.0002      |
|    loss                 | -0.015      |
|    n_updates            | 1250        |
|    policy_gradient_loss | 0.000513    |
|    reward               | 0.01271993  |
|    value_loss           | 0.025       |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.702        |
| time/                   |              |
|    fps                  | 165          |
|    iterations           | 127          |
|    time_elapsed         | 786          |
|    total_timesteps      | 130048       |
| train/                  |              |
|    approx_kl            | 0.0006167332 |
|    clip_fraction        | 0.0156       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.244       |
|    explained_variance   | 3.98e-05     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00297      |
|    n_updates            | 1260         |
|    policy_gradient_loss | -0.00119     |
|    reward               | 0.0          |
|    value_loss           | 0.0244       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.707         |
| time/                   |               |
|    fps                  | 165           |
|    iterations           | 128           |
|    time_elapsed         | 793           |
|    total_timesteps      | 131072        |
| train/                  |               |
|    approx_kl            | 0.0014432155  |
|    clip_fraction        | 0.0223        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.212        |
|    explained_variance   | 0.000502      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00541       |
|    n_updates            | 1270          |
|    policy_gradient_loss | -0.00347      |
|    reward               | 0.00030010505 |
|    value_loss           | 0.0242        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.708        |
| time/                   |              |
|    fps                  | 165          |
|    iterations           | 129          |
|    time_elapsed         | 799          |
|    total_timesteps      | 132096       |
| train/                  |              |
|    approx_kl            | 0.0009959919 |
|    clip_fraction        | 0.0219       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.209       |
|    explained_variance   | 0.000104     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.000752     |
|    n_updates            | 1280         |
|    policy_gradient_loss | -0.00245     |
|    reward               | -0.050319802 |
|    value_loss           | 0.0228       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.716        |
| time/                   |              |
|    fps                  | 165          |
|    iterations           | 130          |
|    time_elapsed         | 805          |
|    total_timesteps      | 133120       |
| train/                  |              |
|    approx_kl            | 0.0007792427 |
|    clip_fraction        | 0.0111       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.208       |
|    explained_variance   | 0.000419     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.0128      |
|    n_updates            | 1290         |
|    policy_gradient_loss | -0.000887    |
|    reward               | -0.050925076 |
|    value_loss           | 0.0276       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.714         |
| time/                   |               |
|    fps                  | 165           |
|    iterations           | 131           |
|    time_elapsed         | 811           |
|    total_timesteps      | 134144        |
| train/                  |               |
|    approx_kl            | 0.00012527447 |
|    clip_fraction        | 0.00918       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.232        |
|    explained_variance   | 0.00268       |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00583       |
|    n_updates            | 1300          |
|    policy_gradient_loss | 0.000296      |
|    reward               | 0.016545659   |
|    value_loss           | 0.0216        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.716         |
| time/                   |               |
|    fps                  | 165           |
|    iterations           | 132           |
|    time_elapsed         | 818           |
|    total_timesteps      | 135168        |
| train/                  |               |
|    approx_kl            | 0.00080684596 |
|    clip_fraction        | 0.0082        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.197        |
|    explained_variance   | 0.00147       |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00527       |
|    n_updates            | 1310          |
|    policy_gradient_loss | -0.00189      |
|    reward               | 0.0           |
|    value_loss           | 0.0273        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.71         |
| time/                   |              |
|    fps                  | 165          |
|    iterations           | 133          |
|    time_elapsed         | 824          |
|    total_timesteps      | 136192       |
| train/                  |              |
|    approx_kl            | 0.000559154  |
|    clip_fraction        | 0.0212       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.223       |
|    explained_variance   | 0.00167      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00124      |
|    n_updates            | 1320         |
|    policy_gradient_loss | -0.000916    |
|    reward               | 0.0022571804 |
|    value_loss           | 0.0263       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.703         |
| time/                   |               |
|    fps                  | 165           |
|    iterations           | 134           |
|    time_elapsed         | 830           |
|    total_timesteps      | 137216        |
| train/                  |               |
|    approx_kl            | 0.00033156085 |
|    clip_fraction        | 0.0135        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.247        |
|    explained_variance   | -0.00569      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.000284      |
|    n_updates            | 1330          |
|    policy_gradient_loss | -0.000305     |
|    reward               | 0.0           |
|    value_loss           | 0.0234        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.697        |
| time/                   |              |
|    fps                  | 165          |
|    iterations           | 135          |
|    time_elapsed         | 837          |
|    total_timesteps      | 138240       |
| train/                  |              |
|    approx_kl            | 0.0005348253 |
|    clip_fraction        | 0.0139       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.278       |
|    explained_variance   | 0.000203     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00781      |
|    n_updates            | 1340         |
|    policy_gradient_loss | 0.000856     |
|    reward               | 0.0027769401 |
|    value_loss           | 0.0243       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.692        |
| time/                   |              |
|    fps                  | 165          |
|    iterations           | 136          |
|    time_elapsed         | 843          |
|    total_timesteps      | 139264       |
| train/                  |              |
|    approx_kl            | 0.0009444321 |
|    clip_fraction        | 0.0187       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.277       |
|    explained_variance   | 0.000996     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0112       |
|    n_updates            | 1350         |
|    policy_gradient_loss | -0.00136     |
|    reward               | -0.035490144 |
|    value_loss           | 0.0215       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.687         |
| time/                   |               |
|    fps                  | 165           |
|    iterations           | 137           |
|    time_elapsed         | 849           |
|    total_timesteps      | 140288        |
| train/                  |               |
|    approx_kl            | 0.00090859557 |
|    clip_fraction        | 0.00977       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.237        |
|    explained_variance   | 0.00146       |
|    learning_rate        | 0.0002        |
|    loss                 | -0.00558      |
|    n_updates            | 1360          |
|    policy_gradient_loss | -0.0021       |
|    reward               | 0.0           |
|    value_loss           | 0.0263        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.681        |
| time/                   |              |
|    fps                  | 165          |
|    iterations           | 138          |
|    time_elapsed         | 855          |
|    total_timesteps      | 141312       |
| train/                  |              |
|    approx_kl            | 0.0012006457 |
|    clip_fraction        | 0.0133       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.242       |
|    explained_variance   | -9.69e-05    |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00567      |
|    n_updates            | 1370         |
|    policy_gradient_loss | -0.000917    |
|    reward               | 0.0          |
|    value_loss           | 0.0206       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.668        |
| time/                   |              |
|    fps                  | 165          |
|    iterations           | 139          |
|    time_elapsed         | 862          |
|    total_timesteps      | 142336       |
| train/                  |              |
|    approx_kl            | 0.0012171773 |
|    clip_fraction        | 0.0278       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.234       |
|    explained_variance   | 0.000431     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0139       |
|    n_updates            | 1380         |
|    policy_gradient_loss | -0.00304     |
|    reward               | 0.040752143  |
|    value_loss           | 0.0215       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.673         |
| time/                   |               |
|    fps                  | 165           |
|    iterations           | 140           |
|    time_elapsed         | 868           |
|    total_timesteps      | 143360        |
| train/                  |               |
|    approx_kl            | 0.00056788913 |
|    clip_fraction        | 0.00947       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.249        |
|    explained_variance   | 0.00039       |
|    learning_rate        | 0.0002        |
|    loss                 | 0.018         |
|    n_updates            | 1390          |
|    policy_gradient_loss | 0.000186      |
|    reward               | 0.01930536    |
|    value_loss           | 0.0315        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.675         |
| time/                   |               |
|    fps                  | 165           |
|    iterations           | 141           |
|    time_elapsed         | 874           |
|    total_timesteps      | 144384        |
| train/                  |               |
|    approx_kl            | 0.00028799177 |
|    clip_fraction        | 0.00342       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.222        |
|    explained_variance   | 0.000925      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0109        |
|    n_updates            | 1400          |
|    policy_gradient_loss | -0.000316     |
|    reward               | 0.0           |
|    value_loss           | 0.0274        |
-------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 0.685       |
| time/                   |             |
|    fps                  | 165         |
|    iterations           | 142         |
|    time_elapsed         | 881         |
|    total_timesteps      | 145408      |
| train/                  |             |
|    approx_kl            | 5.92203e-05 |
|    clip_fraction        | 0.00342     |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.219      |
|    explained_variance   | 0.00261     |
|    learning_rate        | 0.0002      |
|    loss                 | 0.00197     |
|    n_updates            | 1410        |
|    policy_gradient_loss | -7.83e-05   |
|    reward               | 0.0         |
|    value_loss           | 0.0258      |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.677        |
| time/                   |              |
|    fps                  | 164          |
|    iterations           | 143          |
|    time_elapsed         | 887          |
|    total_timesteps      | 146432       |
| train/                  |              |
|    approx_kl            | 0.0007257724 |
|    clip_fraction        | 0.00664      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.208       |
|    explained_variance   | 0.000525     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.000943    |
|    n_updates            | 1420         |
|    policy_gradient_loss | -0.00135     |
|    reward               | 0.07168503   |
|    value_loss           | 0.0326       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.675        |
| time/                   |              |
|    fps                  | 165          |
|    iterations           | 144          |
|    time_elapsed         | 893          |
|    total_timesteps      | 147456       |
| train/                  |              |
|    approx_kl            | 0.0007569877 |
|    clip_fraction        | 0.00713      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.196       |
|    explained_variance   | 0.000111     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00465     |
|    n_updates            | 1430         |
|    policy_gradient_loss | -0.00122     |
|    reward               | 0.0056414255 |
|    value_loss           | 0.0294       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.67          |
| time/                   |               |
|    fps                  | 165           |
|    iterations           | 145           |
|    time_elapsed         | 899           |
|    total_timesteps      | 148480        |
| train/                  |               |
|    approx_kl            | 0.00040494418 |
|    clip_fraction        | 0.00459       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.204        |
|    explained_variance   | 0.000378      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00485       |
|    n_updates            | 1440          |
|    policy_gradient_loss | 0.000492      |
|    reward               | 0.0           |
|    value_loss           | 0.0302        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.669        |
| time/                   |              |
|    fps                  | 165          |
|    iterations           | 146          |
|    time_elapsed         | 905          |
|    total_timesteps      | 149504       |
| train/                  |              |
|    approx_kl            | 0.0003857817 |
|    clip_fraction        | 0.0122       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.233       |
|    explained_variance   | 0.000217     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00784      |
|    n_updates            | 1450         |
|    policy_gradient_loss | 0.000279     |
|    reward               | 0.0          |
|    value_loss           | 0.0255       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.669         |
| time/                   |               |
|    fps                  | 165           |
|    iterations           | 147           |
|    time_elapsed         | 912           |
|    total_timesteps      | 150528        |
| train/                  |               |
|    approx_kl            | 0.00092402106 |
|    clip_fraction        | 0.018         |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.22         |
|    explained_variance   | 0.000632      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0103        |
|    n_updates            | 1460          |
|    policy_gradient_loss | -0.00257      |
|    reward               | 0.016707622   |
|    value_loss           | 0.0279        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.677        |
| time/                   |              |
|    fps                  | 164          |
|    iterations           | 148          |
|    time_elapsed         | 918          |
|    total_timesteps      | 151552       |
| train/                  |              |
|    approx_kl            | 0.0009636518 |
|    clip_fraction        | 0.0137       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.218       |
|    explained_variance   | 0.000437     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00471      |
|    n_updates            | 1470         |
|    policy_gradient_loss | -0.00101     |
|    reward               | 0.0          |
|    value_loss           | 0.0277       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.675        |
| time/                   |              |
|    fps                  | 165          |
|    iterations           | 149          |
|    time_elapsed         | 924          |
|    total_timesteps      | 152576       |
| train/                  |              |
|    approx_kl            | 0.0003792529 |
|    clip_fraction        | 0.0133       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.228       |
|    explained_variance   | 0.00173      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00933      |
|    n_updates            | 1480         |
|    policy_gradient_loss | 4.15e-06     |
|    reward               | -0.037158743 |
|    value_loss           | 0.0292       |
------------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1e+03      |
|    ep_rew_mean          | 0.675      |
| time/                   |            |
|    fps                  | 165        |
|    iterations           | 150        |
|    time_elapsed         | 929        |
|    total_timesteps      | 153600     |
| train/                  |            |
|    approx_kl            | 0.00115191 |
|    clip_fraction        | 0.0263     |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.253     |
|    explained_variance   | 0.000304   |
|    learning_rate        | 0.0002     |
|    loss                 | -0.0118    |
|    n_updates            | 1490       |
|    policy_gradient_loss | -0.000815  |
|    reward               | 0.0        |
|    value_loss           | 0.0299     |
----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.666        |
| time/                   |              |
|    fps                  | 165          |
|    iterations           | 151          |
|    time_elapsed         | 935          |
|    total_timesteps      | 154624       |
| train/                  |              |
|    approx_kl            | 0.0010654179 |
|    clip_fraction        | 0.0145       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.24        |
|    explained_variance   | 0.00033      |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00637     |
|    n_updates            | 1500         |
|    policy_gradient_loss | -0.00129     |
|    reward               | 0.024352083  |
|    value_loss           | 0.0171       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.662        |
| time/                   |              |
|    fps                  | 165          |
|    iterations           | 152          |
|    time_elapsed         | 941          |
|    total_timesteps      | 155648       |
| train/                  |              |
|    approx_kl            | 0.0015257148 |
|    clip_fraction        | 0.0133       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.221       |
|    explained_variance   | 0.00363      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00107      |
|    n_updates            | 1510         |
|    policy_gradient_loss | -0.00221     |
|    reward               | 0.0          |
|    value_loss           | 0.0242       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.657         |
| time/                   |               |
|    fps                  | 165           |
|    iterations           | 153           |
|    time_elapsed         | 948           |
|    total_timesteps      | 156672        |
| train/                  |               |
|    approx_kl            | 0.00058330875 |
|    clip_fraction        | 0.00732       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.215        |
|    explained_variance   | 0.00167       |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00209       |
|    n_updates            | 1520          |
|    policy_gradient_loss | 0.000115      |
|    reward               | 0.0           |
|    value_loss           | 0.026         |
-------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 0.653       |
| time/                   |             |
|    fps                  | 165         |
|    iterations           | 154         |
|    time_elapsed         | 954         |
|    total_timesteps      | 157696      |
| train/                  |             |
|    approx_kl            | 0.001301109 |
|    clip_fraction        | 0.0238      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.188      |
|    explained_variance   | 0.000663    |
|    learning_rate        | 0.0002      |
|    loss                 | 0.00979     |
|    n_updates            | 1530        |
|    policy_gradient_loss | -0.0038     |
|    reward               | 0.03192306  |
|    value_loss           | 0.025       |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.659        |
| time/                   |              |
|    fps                  | 165          |
|    iterations           | 155          |
|    time_elapsed         | 959          |
|    total_timesteps      | 158720       |
| train/                  |              |
|    approx_kl            | 0.0009988445 |
|    clip_fraction        | 0.0213       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.228       |
|    explained_variance   | 0.000287     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00956      |
|    n_updates            | 1540         |
|    policy_gradient_loss | -0.000512    |
|    reward               | 0.016993264  |
|    value_loss           | 0.0262       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.659        |
| time/                   |              |
|    fps                  | 165          |
|    iterations           | 156          |
|    time_elapsed         | 965          |
|    total_timesteps      | 159744       |
| train/                  |              |
|    approx_kl            | 0.0007856978 |
|    clip_fraction        | 0.0143       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.248       |
|    explained_variance   | 0.00092      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0086       |
|    n_updates            | 1550         |
|    policy_gradient_loss | 0.000411     |
|    reward               | -0.033685897 |
|    value_loss           | 0.04         |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.655        |
| time/                   |              |
|    fps                  | 165          |
|    iterations           | 157          |
|    time_elapsed         | 971          |
|    total_timesteps      | 160768       |
| train/                  |              |
|    approx_kl            | 0.0012817776 |
|    clip_fraction        | 0.0227       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.242       |
|    explained_variance   | 0.000309     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00402      |
|    n_updates            | 1560         |
|    policy_gradient_loss | -0.00173     |
|    reward               | 0.03862973   |
|    value_loss           | 0.019        |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.653        |
| time/                   |              |
|    fps                  | 165          |
|    iterations           | 158          |
|    time_elapsed         | 977          |
|    total_timesteps      | 161792       |
| train/                  |              |
|    approx_kl            | 0.0007765395 |
|    clip_fraction        | 0.0104       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.229       |
|    explained_variance   | 0.00115      |
|    learning_rate        | 0.0002       |
|    loss                 | -0.000735    |
|    n_updates            | 1570         |
|    policy_gradient_loss | -0.000942    |
|    reward               | 0.0          |
|    value_loss           | 0.0203       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.645        |
| time/                   |              |
|    fps                  | 165          |
|    iterations           | 160          |
|    time_elapsed         | 989          |
|    total_timesteps      | 163840       |
| train/                  |              |
|    approx_kl            | 0.0007436085 |
|    clip_fraction        | 0.00361      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.245       |
|    explained_variance   | 0.00105      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00793      |
|    n_updates            | 1590         |
|    policy_gradient_loss | 0.000561     |
|    reward               | 0.018250244  |
|    value_loss           | 0.0248       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.636        |
| time/                   |              |
|    fps                  | 165          |
|    iterations           | 161          |
|    time_elapsed         | 995          |
|    total_timesteps      | 164864       |
| train/                  |              |
|    approx_kl            | 0.0008193923 |
|    clip_fraction        | 0.0229       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.225       |
|    explained_variance   | 0.000632     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00257      |
|    n_updates            | 1600         |
|    policy_gradient_loss | -0.00262     |
|    reward               | -0.05476377  |
|    value_loss           | 0.0315       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.647         |
| time/                   |               |
|    fps                  | 165           |
|    iterations           | 162           |
|    time_elapsed         | 1001          |
|    total_timesteps      | 165888        |
| train/                  |               |
|    approx_kl            | 0.00060856936 |
|    clip_fraction        | 0.00498       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.212        |
|    explained_variance   | 0.000817      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00435       |
|    n_updates            | 1610          |
|    policy_gradient_loss | -0.000594     |
|    reward               | 0.013905412   |
|    value_loss           | 0.0185        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.653        |
| time/                   |              |
|    fps                  | 165          |
|    iterations           | 163          |
|    time_elapsed         | 1008         |
|    total_timesteps      | 166912       |
| train/                  |              |
|    approx_kl            | 0.0009643993 |
|    clip_fraction        | 0.00342      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.206       |
|    explained_variance   | 0.00085      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0153       |
|    n_updates            | 1620         |
|    policy_gradient_loss | 3.52e-05     |
|    reward               | 0.03708564   |
|    value_loss           | 0.0214       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.654         |
| time/                   |               |
|    fps                  | 165           |
|    iterations           | 164           |
|    time_elapsed         | 1014          |
|    total_timesteps      | 167936        |
| train/                  |               |
|    approx_kl            | 0.00030949892 |
|    clip_fraction        | 0.000879      |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.211        |
|    explained_variance   | 0.00201       |
|    learning_rate        | 0.0002        |
|    loss                 | 0.000695      |
|    n_updates            | 1630          |
|    policy_gradient_loss | 7.46e-05      |
|    reward               | 0.0           |
|    value_loss           | 0.0291        |
-------------------------------------------


--------------------------------------------
| rollout/                |                |
|    ep_len_mean          | 1e+03          |
|    ep_rew_mean          | 0.655          |
| time/                   |                |
|    fps                  | 165            |
|    iterations           | 165            |
|    time_elapsed         | 1020           |
|    total_timesteps      | 168960         |
| train/                  |                |
|    approx_kl            | 0.0010334083   |
|    clip_fraction        | 0.0207         |
|    clip_range           | 0.2            |
|    entropy_loss         | -0.237         |
|    explained_variance   | -0.000317      |
|    learning_rate        | 0.0002         |
|    loss                 | 0.00625        |
|    n_updates            | 1640           |
|    policy_gradient_loss | -0.0013        |
|    reward               | -9.1170594e-05 |
|    value_loss           | 0.0237         |
--------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.653         |
| time/                   |               |
|    fps                  | 165           |
|    iterations           | 166           |
|    time_elapsed         | 1026          |
|    total_timesteps      | 169984        |
| train/                  |               |
|    approx_kl            | 0.00034103642 |
|    clip_fraction        | 0.00479       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.217        |
|    explained_variance   | 0.000285      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00549       |
|    n_updates            | 1650          |
|    policy_gradient_loss | -0.000343     |
|    reward               | -0.030525353  |
|    value_loss           | 0.0238        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.645        |
| time/                   |              |
|    fps                  | 165          |
|    iterations           | 167          |
|    time_elapsed         | 1032         |
|    total_timesteps      | 171008       |
| train/                  |              |
|    approx_kl            | 0.0015391544 |
|    clip_fraction        | 0.0186       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.227       |
|    explained_variance   | 0.00168      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00403      |
|    n_updates            | 1660         |
|    policy_gradient_loss | -0.00123     |
|    reward               | 0.0078891395 |
|    value_loss           | 0.0224       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.637        |
| time/                   |              |
|    fps                  | 165          |
|    iterations           | 168          |
|    time_elapsed         | 1038         |
|    total_timesteps      | 172032       |
| train/                  |              |
|    approx_kl            | 0.0009821065 |
|    clip_fraction        | 0.0183       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.212       |
|    explained_variance   | 0.00102      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.000755     |
|    n_updates            | 1670         |
|    policy_gradient_loss | -0.00313     |
|    reward               | 0.0          |
|    value_loss           | 0.0226       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.632        |
| time/                   |              |
|    fps                  | 165          |
|    iterations           | 169          |
|    time_elapsed         | 1044         |
|    total_timesteps      | 173056       |
| train/                  |              |
|    approx_kl            | 0.0010001545 |
|    clip_fraction        | 0.0123       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.213       |
|    explained_variance   | 0.00255      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0113       |
|    n_updates            | 1680         |
|    policy_gradient_loss | -0.000708    |
|    reward               | 0.0          |
|    value_loss           | 0.0303       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.641         |
| time/                   |               |
|    fps                  | 165           |
|    iterations           | 171           |
|    time_elapsed         | 1056          |
|    total_timesteps      | 175104        |
| train/                  |               |
|    approx_kl            | 0.00031134865 |
|    clip_fraction        | 0.0043        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.24         |
|    explained_variance   | 0.00357       |
|    learning_rate        | 0.0002        |
|    loss                 | -0.002        |
|    n_updates            | 1700          |
|    policy_gradient_loss | 0.000565      |
|    reward               | 0.0023189315  |
|    value_loss           | 0.0216        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.645         |
| time/                   |               |
|    fps                  | 165           |
|    iterations           | 172           |
|    time_elapsed         | 1062          |
|    total_timesteps      | 176128        |
| train/                  |               |
|    approx_kl            | 0.00079887384 |
|    clip_fraction        | 0.0169        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.242        |
|    explained_variance   | -0.00448      |
|    learning_rate        | 0.0002        |
|    loss                 | -0.00469      |
|    n_updates            | 1710          |
|    policy_gradient_loss | -0.00221      |
|    reward               | -0.14096433   |
|    value_loss           | 0.0226        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.657        |
| time/                   |              |
|    fps                  | 165          |
|    iterations           | 173          |
|    time_elapsed         | 1068         |
|    total_timesteps      | 177152       |
| train/                  |              |
|    approx_kl            | 0.0009802743 |
|    clip_fraction        | 0.0124       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.213       |
|    explained_variance   | 0.00113      |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00467     |
|    n_updates            | 1720         |
|    policy_gradient_loss | -0.00224     |
|    reward               | 0.010099582  |
|    value_loss           | 0.0161       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.658        |
| time/                   |              |
|    fps                  | 165          |
|    iterations           | 174          |
|    time_elapsed         | 1074         |
|    total_timesteps      | 178176       |
| train/                  |              |
|    approx_kl            | 0.00103006   |
|    clip_fraction        | 0.013        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.181       |
|    explained_variance   | 0.000183     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0124       |
|    n_updates            | 1730         |
|    policy_gradient_loss | -0.00248     |
|    reward               | -0.005096023 |
|    value_loss           | 0.0181       |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 0.655       |
| time/                   |             |
|    fps                  | 165         |
|    iterations           | 175         |
|    time_elapsed         | 1080        |
|    total_timesteps      | 179200      |
| train/                  |             |
|    approx_kl            | 0.000607444 |
|    clip_fraction        | 0.0101      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.154      |
|    explained_variance   | 0.000729    |
|    learning_rate        | 0.0002      |
|    loss                 | 0.00153     |
|    n_updates            | 1740        |
|    policy_gradient_loss | -0.00154    |
|    reward               | 0.032391433 |
|    value_loss           | 0.0219      |
-----------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.654         |
| time/                   |               |
|    fps                  | 165           |
|    iterations           | 177           |
|    time_elapsed         | 1092          |
|    total_timesteps      | 181248        |
| train/                  |               |
|    approx_kl            | 0.00057426305 |
|    clip_fraction        | 0.00645       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.146        |
|    explained_variance   | 0.00145       |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0156        |
|    n_updates            | 1760          |
|    policy_gradient_loss | -0.0013       |
|    reward               | -0.0645958    |
|    value_loss           | 0.027         |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.656         |
| time/                   |               |
|    fps                  | 165           |
|    iterations           | 178           |
|    time_elapsed         | 1098          |
|    total_timesteps      | 182272        |
| train/                  |               |
|    approx_kl            | 0.00070981786 |
|    clip_fraction        | 0.0158        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.169        |
|    explained_variance   | 0.000974      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00674       |
|    n_updates            | 1770          |
|    policy_gradient_loss | 0.000787      |
|    reward               | 0.0           |
|    value_loss           | 0.0353        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.659        |
| time/                   |              |
|    fps                  | 165          |
|    iterations           | 179          |
|    time_elapsed         | 1104         |
|    total_timesteps      | 183296       |
| train/                  |              |
|    approx_kl            | 0.0004811883 |
|    clip_fraction        | 0.00957      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.174       |
|    explained_variance   | 0.000715     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00199      |
|    n_updates            | 1780         |
|    policy_gradient_loss | 0.000452     |
|    reward               | 0.0          |
|    value_loss           | 0.0275       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.663         |
| time/                   |               |
|    fps                  | 165           |
|    iterations           | 180           |
|    time_elapsed         | 1110          |
|    total_timesteps      | 184320        |
| train/                  |               |
|    approx_kl            | 0.00059324905 |
|    clip_fraction        | 0.0145        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.202        |
|    explained_variance   | 0.000396      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00688       |
|    n_updates            | 1790          |
|    policy_gradient_loss | -0.000411     |
|    reward               | 0.013664023   |
|    value_loss           | 0.0283        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.662         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 181           |
|    time_elapsed         | 1116          |
|    total_timesteps      | 185344        |
| train/                  |               |
|    approx_kl            | 0.00062299817 |
|    clip_fraction        | 0.00166       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.188        |
|    explained_variance   | 0.000231      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0234        |
|    n_updates            | 1800          |
|    policy_gradient_loss | -7.19e-05     |
|    reward               | 0.026551548   |
|    value_loss           | 0.0298        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.669         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 182           |
|    time_elapsed         | 1122          |
|    total_timesteps      | 186368        |
| train/                  |               |
|    approx_kl            | 0.00058002415 |
|    clip_fraction        | 0.0156        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.175        |
|    explained_variance   | 0.000434      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00947       |
|    n_updates            | 1810          |
|    policy_gradient_loss | -0.00152      |
|    reward               | -0.067680605  |
|    value_loss           | 0.0286        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.669        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 183          |
|    time_elapsed         | 1127         |
|    total_timesteps      | 187392       |
| train/                  |              |
|    approx_kl            | 0.0007538213 |
|    clip_fraction        | 0.0102       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.183       |
|    explained_variance   | 0.0016       |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0153       |
|    n_updates            | 1820         |
|    policy_gradient_loss | -0.000181    |
|    reward               | -0.19894662  |
|    value_loss           | 0.0281       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.673         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 184           |
|    time_elapsed         | 1133          |
|    total_timesteps      | 188416        |
| train/                  |               |
|    approx_kl            | 0.00067573955 |
|    clip_fraction        | 0.0126        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.202        |
|    explained_variance   | 0.00118       |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0101        |
|    n_updates            | 1830          |
|    policy_gradient_loss | 0.000407      |
|    reward               | 0.046029136   |
|    value_loss           | 0.023         |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.682        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 186          |
|    time_elapsed         | 1145         |
|    total_timesteps      | 190464       |
| train/                  |              |
|    approx_kl            | 0.0007356257 |
|    clip_fraction        | 0.0182       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.222       |
|    explained_variance   | 0.000567     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0062       |
|    n_updates            | 1850         |
|    policy_gradient_loss | -5.83e-05    |
|    reward               | 0.00800575   |
|    value_loss           | 0.0275       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.687        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 187          |
|    time_elapsed         | 1151         |
|    total_timesteps      | 191488       |
| train/                  |              |
|    approx_kl            | 0.0008035028 |
|    clip_fraction        | 0.0182       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.189       |
|    explained_variance   | 0.000659     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00528      |
|    n_updates            | 1860         |
|    policy_gradient_loss | -0.00399     |
|    reward               | -0.059939936 |
|    value_loss           | 0.0304       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.682         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 188           |
|    time_elapsed         | 1157          |
|    total_timesteps      | 192512        |
| train/                  |               |
|    approx_kl            | 0.00054796785 |
|    clip_fraction        | 0.00586       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.199        |
|    explained_variance   | 0.00165       |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00315       |
|    n_updates            | 1870          |
|    policy_gradient_loss | -0.000128     |
|    reward               | 0.0           |
|    value_loss           | 0.027         |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.684         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 189           |
|    time_elapsed         | 1163          |
|    total_timesteps      | 193536        |
| train/                  |               |
|    approx_kl            | 0.00079320057 |
|    clip_fraction        | 0.0108        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.189        |
|    explained_variance   | 0.000774      |
|    learning_rate        | 0.0002        |
|    loss                 | -0.000225     |
|    n_updates            | 1880          |
|    policy_gradient_loss | -0.000543     |
|    reward               | 0.024589056   |
|    value_loss           | 0.0325        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.688         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 190           |
|    time_elapsed         | 1169          |
|    total_timesteps      | 194560        |
| train/                  |               |
|    approx_kl            | 0.00072059507 |
|    clip_fraction        | 0.00459       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.161        |
|    explained_variance   | 0.00125       |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0148        |
|    n_updates            | 1890          |
|    policy_gradient_loss | -0.000986     |
|    reward               | 0.004275623   |
|    value_loss           | 0.0285        |
-------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 0.689       |
| time/                   |             |
|    fps                  | 166         |
|    iterations           | 191         |
|    time_elapsed         | 1175        |
|    total_timesteps      | 195584      |
| train/                  |             |
|    approx_kl            | 0.000736668 |
|    clip_fraction        | 0.0125      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.157      |
|    explained_variance   | 0.000647    |
|    learning_rate        | 0.0002      |
|    loss                 | 0.00326     |
|    n_updates            | 1900        |
|    policy_gradient_loss | -0.00129    |
|    reward               | 0.018851802 |
|    value_loss           | 0.0255      |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.689        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 192          |
|    time_elapsed         | 1181         |
|    total_timesteps      | 196608       |
| train/                  |              |
|    approx_kl            | 0.0009170406 |
|    clip_fraction        | 0.0142       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.165       |
|    explained_variance   | 0.000733     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.008        |
|    n_updates            | 1910         |
|    policy_gradient_loss | -0.00208     |
|    reward               | 0.0          |
|    value_loss           | 0.0313       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.69         |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 193          |
|    time_elapsed         | 1188         |
|    total_timesteps      | 197632       |
| train/                  |              |
|    approx_kl            | 0.0010268838 |
|    clip_fraction        | 0.0117       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.173       |
|    explained_variance   | 0.000675     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00639      |
|    n_updates            | 1920         |
|    policy_gradient_loss | -0.0012      |
|    reward               | 0.0          |
|    value_loss           | 0.0322       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.699         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 194           |
|    time_elapsed         | 1193          |
|    total_timesteps      | 198656        |
| train/                  |               |
|    approx_kl            | 0.00044876517 |
|    clip_fraction        | 0.00664       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.191        |
|    explained_variance   | -8.56e-05     |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00396       |
|    n_updates            | 1930          |
|    policy_gradient_loss | 0.000575      |
|    reward               | -0.006336782  |
|    value_loss           | 0.0223        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.708         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 195           |
|    time_elapsed         | 1199          |
|    total_timesteps      | 199680        |
| train/                  |               |
|    approx_kl            | 0.00055719906 |
|    clip_fraction        | 0.0167        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.217        |
|    explained_variance   | 0.00077       |
|    learning_rate        | 0.0002        |
|    loss                 | -0.00461      |
|    n_updates            | 1940          |
|    policy_gradient_loss | -0.00123      |
|    reward               | 0.0           |
|    value_loss           | 0.021         |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.699         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 196           |
|    time_elapsed         | 1205          |
|    total_timesteps      | 200704        |
| train/                  |               |
|    approx_kl            | 0.00071947125 |
|    clip_fraction        | 0.0173        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.208        |
|    explained_variance   | 0.00101       |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00092       |
|    n_updates            | 1950          |
|    policy_gradient_loss | -0.00123      |
|    reward               | 0.0           |
|    value_loss           | 0.0232        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.7          |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 197          |
|    time_elapsed         | 1211         |
|    total_timesteps      | 201728       |
| train/                  |              |
|    approx_kl            | 0.0006583774 |
|    clip_fraction        | 0.0085       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.227       |
|    explained_variance   | 0.000948     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0116       |
|    n_updates            | 1960         |
|    policy_gradient_loss | 0.000644     |
|    reward               | 0.0          |
|    value_loss           | 0.0241       |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 0.702       |
| time/                   |             |
|    fps                  | 166         |
|    iterations           | 198         |
|    time_elapsed         | 1217        |
|    total_timesteps      | 202752      |
| train/                  |             |
|    approx_kl            | 0.001323092 |
|    clip_fraction        | 0.0118      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.252      |
|    explained_variance   | 0.000907    |
|    learning_rate        | 0.0002      |
|    loss                 | 0.00205     |
|    n_updates            | 1970        |
|    policy_gradient_loss | -0.000362   |
|    reward               | 0.025118887 |
|    value_loss           | 0.0227      |
-----------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.703         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 199           |
|    time_elapsed         | 1223          |
|    total_timesteps      | 203776        |
| train/                  |               |
|    approx_kl            | 0.00045906485 |
|    clip_fraction        | 0.0161        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.229        |
|    explained_variance   | 0.000671      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00155       |
|    n_updates            | 1980          |
|    policy_gradient_loss | -0.00268      |
|    reward               | 0.0           |
|    value_loss           | 0.0252        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.704        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 200          |
|    time_elapsed         | 1228         |
|    total_timesteps      | 204800       |
| train/                  |              |
|    approx_kl            | 0.0011020754 |
|    clip_fraction        | 0.0107       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.21        |
|    explained_variance   | 0.00067      |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00774     |
|    n_updates            | 1990         |
|    policy_gradient_loss | -0.00264     |
|    reward               | 0.0          |
|    value_loss           | 0.0208       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.705         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 202           |
|    time_elapsed         | 1241          |
|    total_timesteps      | 206848        |
| train/                  |               |
|    approx_kl            | 0.00062301935 |
|    clip_fraction        | 0.00371       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.251        |
|    explained_variance   | 0.00244       |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0066        |
|    n_updates            | 2010          |
|    policy_gradient_loss | 0.000394      |
|    reward               | 0.0029411486  |
|    value_loss           | 0.0266        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.715        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 203          |
|    time_elapsed         | 1247         |
|    total_timesteps      | 207872       |
| train/                  |              |
|    approx_kl            | 0.0013551197 |
|    clip_fraction        | 0.0189       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.215       |
|    explained_variance   | 0.000426     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00586      |
|    n_updates            | 2020         |
|    policy_gradient_loss | -0.00313     |
|    reward               | 0.0          |
|    value_loss           | 0.0178       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.715         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 204           |
|    time_elapsed         | 1254          |
|    total_timesteps      | 208896        |
| train/                  |               |
|    approx_kl            | 0.00026604492 |
|    clip_fraction        | 0.00771       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.21         |
|    explained_variance   | 0.000438      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00875       |
|    n_updates            | 2030          |
|    policy_gradient_loss | -0.000503     |
|    reward               | 0.030730313   |
|    value_loss           | 0.026         |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.723        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 205          |
|    time_elapsed         | 1260         |
|    total_timesteps      | 209920       |
| train/                  |              |
|    approx_kl            | 0.0008608804 |
|    clip_fraction        | 0.0115       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.216       |
|    explained_variance   | 0.000405     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00362      |
|    n_updates            | 2040         |
|    policy_gradient_loss | -0.000526    |
|    reward               | 0.032989934  |
|    value_loss           | 0.0226       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.73          |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 206           |
|    time_elapsed         | 1268          |
|    total_timesteps      | 210944        |
| train/                  |               |
|    approx_kl            | 0.00084659585 |
|    clip_fraction        | 0.00879       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.184        |
|    explained_variance   | 0.00163       |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0131        |
|    n_updates            | 2050          |
|    policy_gradient_loss | -0.00184      |
|    reward               | 0.013993747   |
|    value_loss           | 0.022         |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.735         |
| time/                   |               |
|    fps                  | 165           |
|    iterations           | 207           |
|    time_elapsed         | 1277          |
|    total_timesteps      | 211968        |
| train/                  |               |
|    approx_kl            | 0.00073529873 |
|    clip_fraction        | 0.0132        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.183        |
|    explained_variance   | 0.00101       |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0067        |
|    n_updates            | 2060          |
|    policy_gradient_loss | -0.00232      |
|    reward               | 0.0           |
|    value_loss           | 0.0249        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.744        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 208          |
|    time_elapsed         | 1282         |
|    total_timesteps      | 212992       |
| train/                  |              |
|    approx_kl            | 0.0006999285 |
|    clip_fraction        | 0.00889      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.191       |
|    explained_variance   | 0.000486     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0116       |
|    n_updates            | 2070         |
|    policy_gradient_loss | -2.03e-05    |
|    reward               | 0.034275576  |
|    value_loss           | 0.0351       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.741        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 209          |
|    time_elapsed         | 1289         |
|    total_timesteps      | 214016       |
| train/                  |              |
|    approx_kl            | 0.0006574831 |
|    clip_fraction        | 0.00684      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.179       |
|    explained_variance   | 0.000915     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00251      |
|    n_updates            | 2080         |
|    policy_gradient_loss | -0.00109     |
|    reward               | -0.03201325  |
|    value_loss           | 0.0264       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.75         |
| time/                   |              |
|    fps                  | 165          |
|    iterations           | 210          |
|    time_elapsed         | 1295         |
|    total_timesteps      | 215040       |
| train/                  |              |
|    approx_kl            | 0.0002915939 |
|    clip_fraction        | 0.0104       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.187       |
|    explained_variance   | 0.000707     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.013        |
|    n_updates            | 2090         |
|    policy_gradient_loss | -0.000403    |
|    reward               | 0.006095278  |
|    value_loss           | 0.0243       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.753         |
| time/                   |               |
|    fps                  | 165           |
|    iterations           | 211           |
|    time_elapsed         | 1302          |
|    total_timesteps      | 216064        |
| train/                  |               |
|    approx_kl            | 0.0008932884  |
|    clip_fraction        | 0.0139        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.17         |
|    explained_variance   | 0.00133       |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00345       |
|    n_updates            | 2100          |
|    policy_gradient_loss | -0.00123      |
|    reward               | -0.0046250643 |
|    value_loss           | 0.0246        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.761         |
| time/                   |               |
|    fps                  | 165           |
|    iterations           | 212           |
|    time_elapsed         | 1308          |
|    total_timesteps      | 217088        |
| train/                  |               |
|    approx_kl            | 0.00033741404 |
|    clip_fraction        | 0.00156       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.159        |
|    explained_variance   | 0.00263       |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00205       |
|    n_updates            | 2110          |
|    policy_gradient_loss | -0.000348     |
|    reward               | 0.0           |
|    value_loss           | 0.0262        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.758        |
| time/                   |              |
|    fps                  | 165          |
|    iterations           | 213          |
|    time_elapsed         | 1315         |
|    total_timesteps      | 218112       |
| train/                  |              |
|    approx_kl            | 0.0006398231 |
|    clip_fraction        | 0.016        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.159       |
|    explained_variance   | 0.00127      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00315      |
|    n_updates            | 2120         |
|    policy_gradient_loss | -0.0024      |
|    reward               | 0.0          |
|    value_loss           | 0.0206       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.761         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 215           |
|    time_elapsed         | 1326          |
|    total_timesteps      | 220160        |
| train/                  |               |
|    approx_kl            | 0.00091766263 |
|    clip_fraction        | 0.0209        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.207        |
|    explained_variance   | 0.000726      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00932       |
|    n_updates            | 2140          |
|    policy_gradient_loss | -0.000657     |
|    reward               | 0.0           |
|    value_loss           | 0.0263        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.762        |
| time/                   |              |
|    fps                  | 165          |
|    iterations           | 216          |
|    time_elapsed         | 1332         |
|    total_timesteps      | 221184       |
| train/                  |              |
|    approx_kl            | 0.0001369362 |
|    clip_fraction        | 0.00371      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.221       |
|    explained_variance   | 0.00018      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.000847     |
|    n_updates            | 2150         |
|    policy_gradient_loss | 0.000457     |
|    reward               | 0.0          |
|    value_loss           | 0.0291       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.765        |
| time/                   |              |
|    fps                  | 165          |
|    iterations           | 217          |
|    time_elapsed         | 1338         |
|    total_timesteps      | 222208       |
| train/                  |              |
|    approx_kl            | 0.0010523428 |
|    clip_fraction        | 0.0134       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.244       |
|    explained_variance   | 0.00263      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0156       |
|    n_updates            | 2160         |
|    policy_gradient_loss | 0.00104      |
|    reward               | -0.00020002  |
|    value_loss           | 0.0266       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.772         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 219           |
|    time_elapsed         | 1350          |
|    total_timesteps      | 224256        |
| train/                  |               |
|    approx_kl            | 0.0010444236  |
|    clip_fraction        | 0.0162        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.269        |
|    explained_variance   | 0.00089       |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0104        |
|    n_updates            | 2180          |
|    policy_gradient_loss | -0.000937     |
|    reward               | 0.00030010505 |
|    value_loss           | 0.024         |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.778        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 220          |
|    time_elapsed         | 1357         |
|    total_timesteps      | 225280       |
| train/                  |              |
|    approx_kl            | 0.0009714379 |
|    clip_fraction        | 0.0192       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.276       |
|    explained_variance   | 0.000505     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.000968    |
|    n_updates            | 2190         |
|    policy_gradient_loss | -0.000225    |
|    reward               | -0.21120153  |
|    value_loss           | 0.0238       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.786         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 221           |
|    time_elapsed         | 1362          |
|    total_timesteps      | 226304        |
| train/                  |               |
|    approx_kl            | 0.00037979055 |
|    clip_fraction        | 0.00566       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.248        |
|    explained_variance   | 0.000756      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00405       |
|    n_updates            | 2200          |
|    policy_gradient_loss | -0.00124      |
|    reward               | 0.0038213597  |
|    value_loss           | 0.0196        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.784         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 222           |
|    time_elapsed         | 1368          |
|    total_timesteps      | 227328        |
| train/                  |               |
|    approx_kl            | 0.00080351805 |
|    clip_fraction        | 0.00576       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.22         |
|    explained_variance   | 0.000155      |
|    learning_rate        | 0.0002        |
|    loss                 | -0.00348      |
|    n_updates            | 2210          |
|    policy_gradient_loss | -0.000854     |
|    reward               | 0.015620238   |
|    value_loss           | 0.0197        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.802        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 223          |
|    time_elapsed         | 1374         |
|    total_timesteps      | 228352       |
| train/                  |              |
|    approx_kl            | 0.0007053092 |
|    clip_fraction        | 0.00527      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.195       |
|    explained_variance   | 0.000193     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00834      |
|    n_updates            | 2220         |
|    policy_gradient_loss | -0.000997    |
|    reward               | 0.0076991217 |
|    value_loss           | 0.0226       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.801        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 224          |
|    time_elapsed         | 1380         |
|    total_timesteps      | 229376       |
| train/                  |              |
|    approx_kl            | 0.0008378888 |
|    clip_fraction        | 0.00723      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.166       |
|    explained_variance   | 0.000563     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00869      |
|    n_updates            | 2230         |
|    policy_gradient_loss | -0.0018      |
|    reward               | 0.0          |
|    value_loss           | 0.0233       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.807        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 225          |
|    time_elapsed         | 1386         |
|    total_timesteps      | 230400       |
| train/                  |              |
|    approx_kl            | 0.0005527921 |
|    clip_fraction        | 0.0146       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.168       |
|    explained_variance   | 0.000529     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0177       |
|    n_updates            | 2240         |
|    policy_gradient_loss | -0.00236     |
|    reward               | 0.037727866  |
|    value_loss           | 0.039        |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.8           |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 226           |
|    time_elapsed         | 1392          |
|    total_timesteps      | 231424        |
| train/                  |               |
|    approx_kl            | 0.00061608164 |
|    clip_fraction        | 0.00889       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.179        |
|    explained_variance   | 0.000279      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0156        |
|    n_updates            | 2250          |
|    policy_gradient_loss | 0.000492      |
|    reward               | 0.0014976343  |
|    value_loss           | 0.0337        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.805         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 227           |
|    time_elapsed         | 1398          |
|    total_timesteps      | 232448        |
| train/                  |               |
|    approx_kl            | 0.00088515383 |
|    clip_fraction        | 0.0291        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.21         |
|    explained_variance   | 0.000308      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00356       |
|    n_updates            | 2260          |
|    policy_gradient_loss | -0.00237      |
|    reward               | -0.02244223   |
|    value_loss           | 0.0285        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.813         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 228           |
|    time_elapsed         | 1404          |
|    total_timesteps      | 233472        |
| train/                  |               |
|    approx_kl            | 0.00078239985 |
|    clip_fraction        | 0.0165        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.24         |
|    explained_variance   | 0.000432      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00262       |
|    n_updates            | 2270          |
|    policy_gradient_loss | 0.00045       |
|    reward               | 0.009534601   |
|    value_loss           | 0.0247        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.821        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 229          |
|    time_elapsed         | 1410         |
|    total_timesteps      | 234496       |
| train/                  |              |
|    approx_kl            | 0.0010156207 |
|    clip_fraction        | 0.0148       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.278       |
|    explained_variance   | 7.96e-05     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.000466    |
|    n_updates            | 2280         |
|    policy_gradient_loss | 0.000297     |
|    reward               | 0.0          |
|    value_loss           | 0.0186       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.816        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 230          |
|    time_elapsed         | 1416         |
|    total_timesteps      | 235520       |
| train/                  |              |
|    approx_kl            | 0.0007346868 |
|    clip_fraction        | 0.0184       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.317       |
|    explained_variance   | 0.000666     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00498      |
|    n_updates            | 2290         |
|    policy_gradient_loss | 0.000595     |
|    reward               | 0.031277474  |
|    value_loss           | 0.0234       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.816        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 231          |
|    time_elapsed         | 1423         |
|    total_timesteps      | 236544       |
| train/                  |              |
|    approx_kl            | 0.0011788718 |
|    clip_fraction        | 0.0123       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.309       |
|    explained_variance   | 0.00103      |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00509     |
|    n_updates            | 2300         |
|    policy_gradient_loss | -0.0012      |
|    reward               | 0.014236515  |
|    value_loss           | 0.0184       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.818        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 232          |
|    time_elapsed         | 1429         |
|    total_timesteps      | 237568       |
| train/                  |              |
|    approx_kl            | 0.0017676903 |
|    clip_fraction        | 0.0157       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.27        |
|    explained_variance   | 0.0013       |
|    learning_rate        | 0.0002       |
|    loss                 | -0.017       |
|    n_updates            | 2310         |
|    policy_gradient_loss | -0.00232     |
|    reward               | -0.2699526   |
|    value_loss           | 0.0177       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.822         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 233           |
|    time_elapsed         | 1435          |
|    total_timesteps      | 238592        |
| train/                  |               |
|    approx_kl            | 0.00078425964 |
|    clip_fraction        | 0.00664       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.248        |
|    explained_variance   | 0.000752      |
|    learning_rate        | 0.0002        |
|    loss                 | -0.00147      |
|    n_updates            | 2320          |
|    policy_gradient_loss | -0.00106      |
|    reward               | 0.00071871455 |
|    value_loss           | 0.0244        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.827        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 234          |
|    time_elapsed         | 1441         |
|    total_timesteps      | 239616       |
| train/                  |              |
|    approx_kl            | 0.0005350602 |
|    clip_fraction        | 0.0105       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.218       |
|    explained_variance   | 0.000204     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00468      |
|    n_updates            | 2330         |
|    policy_gradient_loss | -0.00178     |
|    reward               | 0.0          |
|    value_loss           | 0.0201       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.841        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 236          |
|    time_elapsed         | 1453         |
|    total_timesteps      | 241664       |
| train/                  |              |
|    approx_kl            | 0.0004483891 |
|    clip_fraction        | 0.00781      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.208       |
|    explained_variance   | -0.000727    |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00242      |
|    n_updates            | 2350         |
|    policy_gradient_loss | -0.00159     |
|    reward               | 0.025374921  |
|    value_loss           | 0.0283       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.848        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 237          |
|    time_elapsed         | 1460         |
|    total_timesteps      | 242688       |
| train/                  |              |
|    approx_kl            | 0.0012950447 |
|    clip_fraction        | 0.0183       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.238       |
|    explained_variance   | -0.000733    |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0109       |
|    n_updates            | 2360         |
|    policy_gradient_loss | -8.14e-05    |
|    reward               | 0.012721333  |
|    value_loss           | 0.0303       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.844         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 238           |
|    time_elapsed         | 1466          |
|    total_timesteps      | 243712        |
| train/                  |               |
|    approx_kl            | 0.00014744687 |
|    clip_fraction        | 0.00313       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.238        |
|    explained_variance   | 0.00103       |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00569       |
|    n_updates            | 2370          |
|    policy_gradient_loss | 0.000154      |
|    reward               | -0.025406925  |
|    value_loss           | 0.0233        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.845        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 239          |
|    time_elapsed         | 1472         |
|    total_timesteps      | 244736       |
| train/                  |              |
|    approx_kl            | 0.0006168618 |
|    clip_fraction        | 0.0153       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.276       |
|    explained_variance   | 0.000209     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0148       |
|    n_updates            | 2380         |
|    policy_gradient_loss | 0.000491     |
|    reward               | -0.00020002  |
|    value_loss           | 0.0248       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.846        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 240          |
|    time_elapsed         | 1478         |
|    total_timesteps      | 245760       |
| train/                  |              |
|    approx_kl            | 0.0011851387 |
|    clip_fraction        | 0.00947      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.299       |
|    explained_variance   | 0.00167      |
|    learning_rate        | 0.0002       |
|    loss                 | -0.0107      |
|    n_updates            | 2390         |
|    policy_gradient_loss | 0.000105     |
|    reward               | 0.02691073   |
|    value_loss           | 0.0211       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.844        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 241          |
|    time_elapsed         | 1485         |
|    total_timesteps      | 246784       |
| train/                  |              |
|    approx_kl            | 0.0012160067 |
|    clip_fraction        | 0.00566      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.264       |
|    explained_variance   | 0.000303     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.02         |
|    n_updates            | 2400         |
|    policy_gradient_loss | -0.000812    |
|    reward               | 0.07370271   |
|    value_loss           | 0.0261       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.847        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 242          |
|    time_elapsed         | 1490         |
|    total_timesteps      | 247808       |
| train/                  |              |
|    approx_kl            | 0.0008759541 |
|    clip_fraction        | 0.00449      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.233       |
|    explained_variance   | 0.000628     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00514      |
|    n_updates            | 2410         |
|    policy_gradient_loss | -0.000349    |
|    reward               | 0.0          |
|    value_loss           | 0.022        |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.849        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 243          |
|    time_elapsed         | 1497         |
|    total_timesteps      | 248832       |
| train/                  |              |
|    approx_kl            | 0.0008127984 |
|    clip_fraction        | 0.0163       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.219       |
|    explained_variance   | 0.000481     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0209       |
|    n_updates            | 2420         |
|    policy_gradient_loss | -0.00164     |
|    reward               | 0.018175716  |
|    value_loss           | 0.0297       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.848        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 244          |
|    time_elapsed         | 1503         |
|    total_timesteps      | 249856       |
| train/                  |              |
|    approx_kl            | 0.0011460474 |
|    clip_fraction        | 0.0204       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.206       |
|    explained_variance   | 0.00091      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00589      |
|    n_updates            | 2430         |
|    policy_gradient_loss | -0.00291     |
|    reward               | 0.0          |
|    value_loss           | 0.0229       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.845         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 245           |
|    time_elapsed         | 1510          |
|    total_timesteps      | 250880        |
| train/                  |               |
|    approx_kl            | 0.0010760005  |
|    clip_fraction        | 0.0139        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.177        |
|    explained_variance   | 0.000532      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00861       |
|    n_updates            | 2440          |
|    policy_gradient_loss | -0.00243      |
|    reward               | -0.0016053898 |
|    value_loss           | 0.027         |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.838         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 246           |
|    time_elapsed         | 1516          |
|    total_timesteps      | 251904        |
| train/                  |               |
|    approx_kl            | 0.00053850893 |
|    clip_fraction        | 0.0187        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.204        |
|    explained_variance   | 0.000709      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0109        |
|    n_updates            | 2450          |
|    policy_gradient_loss | -0.000808     |
|    reward               | 0.00050837983 |
|    value_loss           | 0.0268        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.829        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 247          |
|    time_elapsed         | 1521         |
|    total_timesteps      | 252928       |
| train/                  |              |
|    approx_kl            | 0.0008703089 |
|    clip_fraction        | 0.0208       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.232       |
|    explained_variance   | 0.000416     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00815      |
|    n_updates            | 2460         |
|    policy_gradient_loss | -0.000404    |
|    reward               | 0.010072473  |
|    value_loss           | 0.0229       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.826        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 248          |
|    time_elapsed         | 1527         |
|    total_timesteps      | 253952       |
| train/                  |              |
|    approx_kl            | 0.0010088126 |
|    clip_fraction        | 0.0166       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.24        |
|    explained_variance   | 0.003        |
|    learning_rate        | 0.0002       |
|    loss                 | 0.000404     |
|    n_updates            | 2470         |
|    policy_gradient_loss | -0.000896    |
|    reward               | 0.029241676  |
|    value_loss           | 0.0253       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.834         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 249           |
|    time_elapsed         | 1534          |
|    total_timesteps      | 254976        |
| train/                  |               |
|    approx_kl            | 0.00038377487 |
|    clip_fraction        | 0.0082        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.26         |
|    explained_variance   | 0.000995      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0112        |
|    n_updates            | 2480          |
|    policy_gradient_loss | 0.000618      |
|    reward               | 0.0           |
|    value_loss           | 0.024         |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.835        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 250          |
|    time_elapsed         | 1540         |
|    total_timesteps      | 256000       |
| train/                  |              |
|    approx_kl            | 0.0011413882 |
|    clip_fraction        | 0.0152       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.291       |
|    explained_variance   | 0.00356      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0125       |
|    n_updates            | 2490         |
|    policy_gradient_loss | 0.000403     |
|    reward               | -0.017534487 |
|    value_loss           | 0.0195       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.839        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 251          |
|    time_elapsed         | 1546         |
|    total_timesteps      | 257024       |
| train/                  |              |
|    approx_kl            | 0.0020641629 |
|    clip_fraction        | 0.0215       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.265       |
|    explained_variance   | 0.00121      |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00604     |
|    n_updates            | 2500         |
|    policy_gradient_loss | -0.00341     |
|    reward               | 0.0          |
|    value_loss           | 0.0181       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.836         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 252           |
|    time_elapsed         | 1552          |
|    total_timesteps      | 258048        |
| train/                  |               |
|    approx_kl            | 0.00075277046 |
|    clip_fraction        | 0.013         |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.236        |
|    explained_variance   | 0.00117       |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00284       |
|    n_updates            | 2510          |
|    policy_gradient_loss | -0.00247      |
|    reward               | -0.34777367   |
|    value_loss           | 0.0227        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.843        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 253          |
|    time_elapsed         | 1558         |
|    total_timesteps      | 259072       |
| train/                  |              |
|    approx_kl            | 0.0009112849 |
|    clip_fraction        | 0.0127       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.229       |
|    explained_variance   | 4.1e-05      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00788      |
|    n_updates            | 2520         |
|    policy_gradient_loss | -0.000519    |
|    reward               | 0.012534712  |
|    value_loss           | 0.0217       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.844        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 254          |
|    time_elapsed         | 1564         |
|    total_timesteps      | 260096       |
| train/                  |              |
|    approx_kl            | 0.0004950909 |
|    clip_fraction        | 0.00342      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.221       |
|    explained_variance   | 0.000518     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00301      |
|    n_updates            | 2530         |
|    policy_gradient_loss | -0.000533    |
|    reward               | -0.1117932   |
|    value_loss           | 0.0221       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.844        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 255          |
|    time_elapsed         | 1570         |
|    total_timesteps      | 261120       |
| train/                  |              |
|    approx_kl            | 0.0008636546 |
|    clip_fraction        | 0.00625      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.224       |
|    explained_variance   | 0.000879     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0064       |
|    n_updates            | 2540         |
|    policy_gradient_loss | -0.000207    |
|    reward               | 0.0          |
|    value_loss           | 0.0227       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.851        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 256          |
|    time_elapsed         | 1576         |
|    total_timesteps      | 262144       |
| train/                  |              |
|    approx_kl            | 0.0010228944 |
|    clip_fraction        | 0.0294       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.224       |
|    explained_variance   | 0.000239     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00171      |
|    n_updates            | 2550         |
|    policy_gradient_loss | -0.00287     |
|    reward               | 0.005140901  |
|    value_loss           | 0.0413       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.841         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 257           |
|    time_elapsed         | 1582          |
|    total_timesteps      | 263168        |
| train/                  |               |
|    approx_kl            | 0.00079261925 |
|    clip_fraction        | 0.0187        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.207        |
|    explained_variance   | 0.000461      |
|    learning_rate        | 0.0002        |
|    loss                 | -0.00475      |
|    n_updates            | 2560          |
|    policy_gradient_loss | -0.00275      |
|    reward               | 0.017475843   |
|    value_loss           | 0.0275        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.848        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 258          |
|    time_elapsed         | 1588         |
|    total_timesteps      | 264192       |
| train/                  |              |
|    approx_kl            | 0.0020313803 |
|    clip_fraction        | 0.0193       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.251       |
|    explained_variance   | 0.000612     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0104       |
|    n_updates            | 2570         |
|    policy_gradient_loss | -0.00069     |
|    reward               | 0.010763548  |
|    value_loss           | 0.0254       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.848        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 259          |
|    time_elapsed         | 1594         |
|    total_timesteps      | 265216       |
| train/                  |              |
|    approx_kl            | 0.0006589128 |
|    clip_fraction        | 0.0114       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.249       |
|    explained_variance   | 0.000808     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00111     |
|    n_updates            | 2580         |
|    policy_gradient_loss | -0.000542    |
|    reward               | 0.0055839494 |
|    value_loss           | 0.0219       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.857        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 260          |
|    time_elapsed         | 1600         |
|    total_timesteps      | 266240       |
| train/                  |              |
|    approx_kl            | 0.0012651677 |
|    clip_fraction        | 0.0135       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.251       |
|    explained_variance   | 0.00096      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0177       |
|    n_updates            | 2590         |
|    policy_gradient_loss | -0.0012      |
|    reward               | 0.0          |
|    value_loss           | 0.0249       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.847         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 261           |
|    time_elapsed         | 1607          |
|    total_timesteps      | 267264        |
| train/                  |               |
|    approx_kl            | 0.00032524118 |
|    clip_fraction        | 0.00205       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.259        |
|    explained_variance   | 0.000156      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00537       |
|    n_updates            | 2600          |
|    policy_gradient_loss | 0.000397      |
|    reward               | 0.0           |
|    value_loss           | 0.0225        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.841        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 262          |
|    time_elapsed         | 1613         |
|    total_timesteps      | 268288       |
| train/                  |              |
|    approx_kl            | 0.0013454596 |
|    clip_fraction        | 0.0194       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.225       |
|    explained_variance   | 0.000855     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00535      |
|    n_updates            | 2610         |
|    policy_gradient_loss | -0.00367     |
|    reward               | -0.017459273 |
|    value_loss           | 0.0222       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.832        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 263          |
|    time_elapsed         | 1619         |
|    total_timesteps      | 269312       |
| train/                  |              |
|    approx_kl            | 0.0006821329 |
|    clip_fraction        | 0.0132       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.205       |
|    explained_variance   | 0.000907     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00391      |
|    n_updates            | 2620         |
|    policy_gradient_loss | -0.00242     |
|    reward               | 0.011676818  |
|    value_loss           | 0.0311       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.835        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 264          |
|    time_elapsed         | 1625         |
|    total_timesteps      | 270336       |
| train/                  |              |
|    approx_kl            | 0.0005174292 |
|    clip_fraction        | 0.00449      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.194       |
|    explained_variance   | 0.000189     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00984      |
|    n_updates            | 2630         |
|    policy_gradient_loss | -0.000557    |
|    reward               | 0.0          |
|    value_loss           | 0.0225       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.848        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 265          |
|    time_elapsed         | 1631         |
|    total_timesteps      | 271360       |
| train/                  |              |
|    approx_kl            | 0.0009104232 |
|    clip_fraction        | 0.0163       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.168       |
|    explained_variance   | 0.00109      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00467      |
|    n_updates            | 2640         |
|    policy_gradient_loss | -0.0034      |
|    reward               | 0.0023470155 |
|    value_loss           | 0.0191       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.857        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 266          |
|    time_elapsed         | 1637         |
|    total_timesteps      | 272384       |
| train/                  |              |
|    approx_kl            | 0.0009799182 |
|    clip_fraction        | 0.0115       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.161       |
|    explained_variance   | 0.00062      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0113       |
|    n_updates            | 2650         |
|    policy_gradient_loss | -0.00131     |
|    reward               | -0.020051284 |
|    value_loss           | 0.0295       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.86          |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 267           |
|    time_elapsed         | 1644          |
|    total_timesteps      | 273408        |
| train/                  |               |
|    approx_kl            | 0.00088930596 |
|    clip_fraction        | 0.00986       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.168        |
|    explained_variance   | 0.00052       |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00597       |
|    n_updates            | 2660          |
|    policy_gradient_loss | -0.000529     |
|    reward               | 0.0038381438  |
|    value_loss           | 0.0285        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.856         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 268           |
|    time_elapsed         | 1650          |
|    total_timesteps      | 274432        |
| train/                  |               |
|    approx_kl            | 0.00090283196 |
|    clip_fraction        | 0.0189        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.188        |
|    explained_variance   | 0.000871      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0177        |
|    n_updates            | 2670          |
|    policy_gradient_loss | 1.61e-05      |
|    reward               | 0.007481619   |
|    value_loss           | 0.0339        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.853        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 269          |
|    time_elapsed         | 1656         |
|    total_timesteps      | 275456       |
| train/                  |              |
|    approx_kl            | 0.0006366143 |
|    clip_fraction        | 0.00967      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.179       |
|    explained_variance   | 0.000861     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00791      |
|    n_updates            | 2680         |
|    policy_gradient_loss | -0.000498    |
|    reward               | -0.153939    |
|    value_loss           | 0.0314       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.849         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 270           |
|    time_elapsed         | 1662          |
|    total_timesteps      | 276480        |
| train/                  |               |
|    approx_kl            | 0.00045470137 |
|    clip_fraction        | 0.018         |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.205        |
|    explained_variance   | 0.000645      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0041        |
|    n_updates            | 2690          |
|    policy_gradient_loss | -0.000293     |
|    reward               | -0.006944524  |
|    value_loss           | 0.0207        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.846         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 271           |
|    time_elapsed         | 1668          |
|    total_timesteps      | 277504        |
| train/                  |               |
|    approx_kl            | 0.00041229656 |
|    clip_fraction        | 0.0082        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.206        |
|    explained_variance   | 0.000906      |
|    learning_rate        | 0.0002        |
|    loss                 | -0.0007       |
|    n_updates            | 2700          |
|    policy_gradient_loss | 0.000262      |
|    reward               | 0.011988248   |
|    value_loss           | 0.026         |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.839         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 272           |
|    time_elapsed         | 1675          |
|    total_timesteps      | 278528        |
| train/                  |               |
|    approx_kl            | 0.00050877815 |
|    clip_fraction        | 0.00752       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.197        |
|    explained_variance   | 0.000624      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00902       |
|    n_updates            | 2710          |
|    policy_gradient_loss | -0.000497     |
|    reward               | 0.010214516   |
|    value_loss           | 0.0281        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.837        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 273          |
|    time_elapsed         | 1681         |
|    total_timesteps      | 279552       |
| train/                  |              |
|    approx_kl            | 0.0009510463 |
|    clip_fraction        | 0.0219       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.229       |
|    explained_variance   | 0.000154     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.0024      |
|    n_updates            | 2720         |
|    policy_gradient_loss | -0.000199    |
|    reward               | 0.01030393   |
|    value_loss           | 0.0435       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.84         |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 274          |
|    time_elapsed         | 1688         |
|    total_timesteps      | 280576       |
| train/                  |              |
|    approx_kl            | 0.0012184638 |
|    clip_fraction        | 0.0132       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.198       |
|    explained_variance   | 0.000532     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00128     |
|    n_updates            | 2730         |
|    policy_gradient_loss | -0.00214     |
|    reward               | -0.013988468 |
|    value_loss           | 0.0189       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.836        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 275          |
|    time_elapsed         | 1693         |
|    total_timesteps      | 281600       |
| train/                  |              |
|    approx_kl            | 0.0006170273 |
|    clip_fraction        | 0.00547      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.186       |
|    explained_variance   | 0.000834     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00472      |
|    n_updates            | 2740         |
|    policy_gradient_loss | -0.000238    |
|    reward               | 0.0112536205 |
|    value_loss           | 0.0233       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.826         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 276           |
|    time_elapsed         | 1700          |
|    total_timesteps      | 282624        |
| train/                  |               |
|    approx_kl            | 0.0005430467  |
|    clip_fraction        | 0.0151        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.192        |
|    explained_variance   | 6.04e-05      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0128        |
|    n_updates            | 2750          |
|    policy_gradient_loss | -0.00131      |
|    reward               | -0.0007128663 |
|    value_loss           | 0.0345        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.824        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 277          |
|    time_elapsed         | 1707         |
|    total_timesteps      | 283648       |
| train/                  |              |
|    approx_kl            | 0.0010815107 |
|    clip_fraction        | 0.0164       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.159       |
|    explained_variance   | 0.0019       |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0083       |
|    n_updates            | 2760         |
|    policy_gradient_loss | -0.00275     |
|    reward               | -0.025608404 |
|    value_loss           | 0.0245       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.82          |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 278           |
|    time_elapsed         | 1713          |
|    total_timesteps      | 284672        |
| train/                  |               |
|    approx_kl            | 0.00060399546 |
|    clip_fraction        | 0.0111        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.177        |
|    explained_variance   | 0.000838      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00772       |
|    n_updates            | 2770          |
|    policy_gradient_loss | -0.000677     |
|    reward               | 0.004595032   |
|    value_loss           | 0.0312        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.819         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 279           |
|    time_elapsed         | 1719          |
|    total_timesteps      | 285696        |
| train/                  |               |
|    approx_kl            | 0.00046175596 |
|    clip_fraction        | 0.00986       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.203        |
|    explained_variance   | 0.00218       |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0187        |
|    n_updates            | 2780          |
|    policy_gradient_loss | 0.000645      |
|    reward               | 0.08602249    |
|    value_loss           | 0.0258        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.818        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 280          |
|    time_elapsed         | 1725         |
|    total_timesteps      | 286720       |
| train/                  |              |
|    approx_kl            | 0.0008075678 |
|    clip_fraction        | 0.00908      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.218       |
|    explained_variance   | 0.000371     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0433       |
|    n_updates            | 2790         |
|    policy_gradient_loss | 0.000632     |
|    reward               | -0.15344605  |
|    value_loss           | 0.0406       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.808         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 281           |
|    time_elapsed         | 1731          |
|    total_timesteps      | 287744        |
| train/                  |               |
|    approx_kl            | 0.0005752467  |
|    clip_fraction        | 0.0104        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.249        |
|    explained_variance   | 0.00288       |
|    learning_rate        | 0.0002        |
|    loss                 | 0.019         |
|    n_updates            | 2800          |
|    policy_gradient_loss | 0.000239      |
|    reward               | 0.00088355783 |
|    value_loss           | 0.0244        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.798         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 282           |
|    time_elapsed         | 1737          |
|    total_timesteps      | 288768        |
| train/                  |               |
|    approx_kl            | 0.00057605084 |
|    clip_fraction        | 0.0179        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.221        |
|    explained_variance   | 0.00054       |
|    learning_rate        | 0.0002        |
|    loss                 | 0.000902      |
|    n_updates            | 2810          |
|    policy_gradient_loss | -0.00251      |
|    reward               | -0.023407778  |
|    value_loss           | 0.0219        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.802        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 283          |
|    time_elapsed         | 1743         |
|    total_timesteps      | 289792       |
| train/                  |              |
|    approx_kl            | 0.0002533669 |
|    clip_fraction        | 0.0187       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.253       |
|    explained_variance   | 0.000611     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00725      |
|    n_updates            | 2820         |
|    policy_gradient_loss | 6.28e-05     |
|    reward               | 0.005941765  |
|    value_loss           | 0.0249       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.802        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 284          |
|    time_elapsed         | 1749         |
|    total_timesteps      | 290816       |
| train/                  |              |
|    approx_kl            | 0.0009259172 |
|    clip_fraction        | 0.00908      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.252       |
|    explained_variance   | 0.000897     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0159       |
|    n_updates            | 2830         |
|    policy_gradient_loss | -0.000668    |
|    reward               | 0.0          |
|    value_loss           | 0.0225       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.81         |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 285          |
|    time_elapsed         | 1756         |
|    total_timesteps      | 291840       |
| train/                  |              |
|    approx_kl            | 0.0011730407 |
|    clip_fraction        | 0.0114       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.219       |
|    explained_variance   | 0.000549     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0166       |
|    n_updates            | 2840         |
|    policy_gradient_loss | -0.00246     |
|    reward               | 0.086244985  |
|    value_loss           | 0.0271       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.808        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 286          |
|    time_elapsed         | 1762         |
|    total_timesteps      | 292864       |
| train/                  |              |
|    approx_kl            | 0.0003751634 |
|    clip_fraction        | 0.0103       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.23        |
|    explained_variance   | -0.000391    |
|    learning_rate        | 0.0002       |
|    loss                 | -9e-05       |
|    n_updates            | 2850         |
|    policy_gradient_loss | -8.74e-05    |
|    reward               | -0.07013038  |
|    value_loss           | 0.0225       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.804        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 287          |
|    time_elapsed         | 1767         |
|    total_timesteps      | 293888       |
| train/                  |              |
|    approx_kl            | 0.0011993657 |
|    clip_fraction        | 0.0171       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.195       |
|    explained_variance   | 0.000393     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0089       |
|    n_updates            | 2860         |
|    policy_gradient_loss | -0.00328     |
|    reward               | 0.03932904   |
|    value_loss           | 0.0214       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.799         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 288           |
|    time_elapsed         | 1773          |
|    total_timesteps      | 294912        |
| train/                  |               |
|    approx_kl            | 0.00033270544 |
|    clip_fraction        | 0.00381       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.171        |
|    explained_variance   | 0.000183      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00928       |
|    n_updates            | 2870          |
|    policy_gradient_loss | -0.00102      |
|    reward               | -0.029306779  |
|    value_loss           | 0.0347        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.792         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 289           |
|    time_elapsed         | 1780          |
|    total_timesteps      | 295936        |
| train/                  |               |
|    approx_kl            | 0.00056364364 |
|    clip_fraction        | 0.00937       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.169        |
|    explained_variance   | 0.000461      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00438       |
|    n_updates            | 2880          |
|    policy_gradient_loss | -0.000593     |
|    reward               | -0.023227466  |
|    value_loss           | 0.0268        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.787         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 290           |
|    time_elapsed         | 1785          |
|    total_timesteps      | 296960        |
| train/                  |               |
|    approx_kl            | 0.00054336985 |
|    clip_fraction        | 0.00684       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.151        |
|    explained_variance   | 1.78e-05      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0133        |
|    n_updates            | 2890          |
|    policy_gradient_loss | -0.000992     |
|    reward               | 0.04795181    |
|    value_loss           | 0.0456        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.78          |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 291           |
|    time_elapsed         | 1792          |
|    total_timesteps      | 297984        |
| train/                  |               |
|    approx_kl            | 0.00055780535 |
|    clip_fraction        | 0.00898       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.154        |
|    explained_variance   | 0.000355      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0191        |
|    n_updates            | 2900          |
|    policy_gradient_loss | -0.000578     |
|    reward               | 0.00018805108 |
|    value_loss           | 0.0287        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.777        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 292          |
|    time_elapsed         | 1797         |
|    total_timesteps      | 299008       |
| train/                  |              |
|    approx_kl            | 0.0009855672 |
|    clip_fraction        | 0.0043       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.165       |
|    explained_variance   | 0.000594     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0122       |
|    n_updates            | 2910         |
|    policy_gradient_loss | -0.000347    |
|    reward               | 0.015635557  |
|    value_loss           | 0.0367       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.777         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 293           |
|    time_elapsed         | 1803          |
|    total_timesteps      | 300032        |
| train/                  |               |
|    approx_kl            | 0.00055259257 |
|    clip_fraction        | 0.0121        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.199        |
|    explained_variance   | 0.00103       |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00443       |
|    n_updates            | 2920          |
|    policy_gradient_loss | -5.85e-05     |
|    reward               | 0.0           |
|    value_loss           | 0.0209        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.786        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 294          |
|    time_elapsed         | 1809         |
|    total_timesteps      | 301056       |
| train/                  |              |
|    approx_kl            | 0.0004923203 |
|    clip_fraction        | 0.00684      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.211       |
|    explained_variance   | 0.00173      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0183       |
|    n_updates            | 2930         |
|    policy_gradient_loss | 0.000277     |
|    reward               | 0.0          |
|    value_loss           | 0.0329       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.778        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 295          |
|    time_elapsed         | 1816         |
|    total_timesteps      | 302080       |
| train/                  |              |
|    approx_kl            | 0.0012127343 |
|    clip_fraction        | 0.0267       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.209       |
|    explained_variance   | 0.00128      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00822      |
|    n_updates            | 2940         |
|    policy_gradient_loss | -0.00276     |
|    reward               | 0.033484463  |
|    value_loss           | 0.0294       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.782        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 296          |
|    time_elapsed         | 1822         |
|    total_timesteps      | 303104       |
| train/                  |              |
|    approx_kl            | 0.0005696336 |
|    clip_fraction        | 0.0082       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.221       |
|    explained_variance   | 0.000609     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00356      |
|    n_updates            | 2950         |
|    policy_gradient_loss | 0.00116      |
|    reward               | 0.0          |
|    value_loss           | 0.027        |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 0.784       |
| time/                   |             |
|    fps                  | 166         |
|    iterations           | 297         |
|    time_elapsed         | 1828        |
|    total_timesteps      | 304128      |
| train/                  |             |
|    approx_kl            | 0.000431414 |
|    clip_fraction        | 0.00449     |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.206      |
|    explained_variance   | -0.000562   |
|    learning_rate        | 0.0002      |
|    loss                 | 0.00424     |
|    n_updates            | 2960        |
|    policy_gradient_loss | -0.000607   |
|    reward               | 0.0         |
|    value_loss           | 0.0255      |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.784        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 298          |
|    time_elapsed         | 1834         |
|    total_timesteps      | 305152       |
| train/                  |              |
|    approx_kl            | 0.0008508959 |
|    clip_fraction        | 0.0156       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.224       |
|    explained_variance   | 0.000145     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00519      |
|    n_updates            | 2970         |
|    policy_gradient_loss | -0.000996    |
|    reward               | 0.0          |
|    value_loss           | 0.0244       |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 0.79        |
| time/                   |             |
|    fps                  | 166         |
|    iterations           | 299         |
|    time_elapsed         | 1840        |
|    total_timesteps      | 306176      |
| train/                  |             |
|    approx_kl            | 0.001507367 |
|    clip_fraction        | 0.023       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.217      |
|    explained_variance   | 0.000649    |
|    learning_rate        | 0.0002      |
|    loss                 | -0.00412    |
|    n_updates            | 2980        |
|    policy_gradient_loss | -0.00345    |
|    reward               | 0.0         |
|    value_loss           | 0.0323      |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.785        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 300          |
|    time_elapsed         | 1846         |
|    total_timesteps      | 307200       |
| train/                  |              |
|    approx_kl            | 0.0014179592 |
|    clip_fraction        | 0.0192       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.194       |
|    explained_variance   | 0.00155      |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00212     |
|    n_updates            | 2990         |
|    policy_gradient_loss | -0.00276     |
|    reward               | 0.0          |
|    value_loss           | 0.0188       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.779         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 301           |
|    time_elapsed         | 1852          |
|    total_timesteps      | 308224        |
| train/                  |               |
|    approx_kl            | 0.00047279103 |
|    clip_fraction        | 0.0119        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.222        |
|    explained_variance   | 0.00112       |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0205        |
|    n_updates            | 3000          |
|    policy_gradient_loss | 0.000274      |
|    reward               | 0.025657088   |
|    value_loss           | 0.0331        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.784         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 302           |
|    time_elapsed         | 1858          |
|    total_timesteps      | 309248        |
| train/                  |               |
|    approx_kl            | 0.00045332208 |
|    clip_fraction        | 0.00674       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.233        |
|    explained_variance   | 0.00164       |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00385       |
|    n_updates            | 3010          |
|    policy_gradient_loss | 9.29e-05      |
|    reward               | 0.0           |
|    value_loss           | 0.0235        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.767        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 303          |
|    time_elapsed         | 1864         |
|    total_timesteps      | 310272       |
| train/                  |              |
|    approx_kl            | 0.0010152118 |
|    clip_fraction        | 0.0182       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.213       |
|    explained_variance   | -0.00066     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00969     |
|    n_updates            | 3020         |
|    policy_gradient_loss | -0.00207     |
|    reward               | 0.0116402535 |
|    value_loss           | 0.0225       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.77         |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 304          |
|    time_elapsed         | 1870         |
|    total_timesteps      | 311296       |
| train/                  |              |
|    approx_kl            | 0.0007827825 |
|    clip_fraction        | 0.00859      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.211       |
|    explained_variance   | 0.000412     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00153      |
|    n_updates            | 3030         |
|    policy_gradient_loss | -0.00105     |
|    reward               | 0.0          |
|    value_loss           | 0.0251       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.767        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 305          |
|    time_elapsed         | 1876         |
|    total_timesteps      | 312320       |
| train/                  |              |
|    approx_kl            | 0.0004528652 |
|    clip_fraction        | 0.0084       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.201       |
|    explained_variance   | 0.00098      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00175      |
|    n_updates            | 3040         |
|    policy_gradient_loss | -0.00149     |
|    reward               | -0.0609851   |
|    value_loss           | 0.0244       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.764        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 306          |
|    time_elapsed         | 1883         |
|    total_timesteps      | 313344       |
| train/                  |              |
|    approx_kl            | 0.0008065027 |
|    clip_fraction        | 0.00596      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.177       |
|    explained_variance   | 0.00082      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00639      |
|    n_updates            | 3050         |
|    policy_gradient_loss | -0.0013      |
|    reward               | 0.06667855   |
|    value_loss           | 0.0225       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.77         |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 307          |
|    time_elapsed         | 1889         |
|    total_timesteps      | 314368       |
| train/                  |              |
|    approx_kl            | 0.0004076055 |
|    clip_fraction        | 0.0106       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.2         |
|    explained_variance   | 0.000337     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0168       |
|    n_updates            | 3060         |
|    policy_gradient_loss | 0.000662     |
|    reward               | -0.08360202  |
|    value_loss           | 0.0437       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.765        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 308          |
|    time_elapsed         | 1895         |
|    total_timesteps      | 315392       |
| train/                  |              |
|    approx_kl            | 0.0014588393 |
|    clip_fraction        | 0.0114       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.181       |
|    explained_variance   | 0.000911     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00435      |
|    n_updates            | 3070         |
|    policy_gradient_loss | -0.00179     |
|    reward               | 0.04525412   |
|    value_loss           | 0.0256       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.762         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 309           |
|    time_elapsed         | 1901          |
|    total_timesteps      | 316416        |
| train/                  |               |
|    approx_kl            | 0.00036729785 |
|    clip_fraction        | 0.00752       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.192        |
|    explained_variance   | 0.000253      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0141        |
|    n_updates            | 3080          |
|    policy_gradient_loss | 0.000225      |
|    reward               | 0.0           |
|    value_loss           | 0.0499        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.754         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 310           |
|    time_elapsed         | 1907          |
|    total_timesteps      | 317440        |
| train/                  |               |
|    approx_kl            | 0.00044958084 |
|    clip_fraction        | 0.0127        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.213        |
|    explained_variance   | 0.000803      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00429       |
|    n_updates            | 3090          |
|    policy_gradient_loss | -0.000268     |
|    reward               | 0.0           |
|    value_loss           | 0.0256        |
-------------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1e+03      |
|    ep_rew_mean          | 0.752      |
| time/                   |            |
|    fps                  | 166        |
|    iterations           | 311        |
|    time_elapsed         | 1913       |
|    total_timesteps      | 318464     |
| train/                  |            |
|    approx_kl            | 0.00110374 |
|    clip_fraction        | 0.0145     |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.206     |
|    explained_variance   | 0.000798   |
|    learning_rate        | 0.0002     |
|    loss                 | 0.0214     |
|    n_updates            | 3100       |
|    policy_gradient_loss | -0.00123   |
|    reward               | 0.0        |
|    value_loss           | 0.0275     |
----------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.755         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 312           |
|    time_elapsed         | 1919          |
|    total_timesteps      | 319488        |
| train/                  |               |
|    approx_kl            | 0.00047663198 |
|    clip_fraction        | 0.011         |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.226        |
|    explained_variance   | 0.000486      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00352       |
|    n_updates            | 3110          |
|    policy_gradient_loss | 8.56e-05      |
|    reward               | 0.0           |
|    value_loss           | 0.0226        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.746         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 313           |
|    time_elapsed         | 1926          |
|    total_timesteps      | 320512        |
| train/                  |               |
|    approx_kl            | 0.00020217797 |
|    clip_fraction        | 0.00508       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.242        |
|    explained_variance   | 0.00212       |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00387       |
|    n_updates            | 3120          |
|    policy_gradient_loss | 0.000661      |
|    reward               | -0.08657094   |
|    value_loss           | 0.0213        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.745        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 314          |
|    time_elapsed         | 1932         |
|    total_timesteps      | 321536       |
| train/                  |              |
|    approx_kl            | 0.0010099465 |
|    clip_fraction        | 0.0131       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.233       |
|    explained_variance   | 0.000518     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0133       |
|    n_updates            | 3130         |
|    policy_gradient_loss | -0.00137     |
|    reward               | -0.11449633  |
|    value_loss           | 0.0322       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.738        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 315          |
|    time_elapsed         | 1939         |
|    total_timesteps      | 322560       |
| train/                  |              |
|    approx_kl            | 0.0003188309 |
|    clip_fraction        | 0.0109       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.254       |
|    explained_variance   | 0.00053      |
|    learning_rate        | 0.0002       |
|    loss                 | -0.000564    |
|    n_updates            | 3140         |
|    policy_gradient_loss | 0.000782     |
|    reward               | 0.0016081397 |
|    value_loss           | 0.0317       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.728        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 316          |
|    time_elapsed         | 1945         |
|    total_timesteps      | 323584       |
| train/                  |              |
|    approx_kl            | 0.0012069254 |
|    clip_fraction        | 0.0148       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.221       |
|    explained_variance   | 0.000526     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00174      |
|    n_updates            | 3150         |
|    policy_gradient_loss | -0.00228     |
|    reward               | 0.0064806035 |
|    value_loss           | 0.0245       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.732         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 317           |
|    time_elapsed         | 1951          |
|    total_timesteps      | 324608        |
| train/                  |               |
|    approx_kl            | 0.00024823542 |
|    clip_fraction        | 0.0125        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.239        |
|    explained_variance   | 0.000559      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0144        |
|    n_updates            | 3160          |
|    policy_gradient_loss | 0.000329      |
|    reward               | 0.0           |
|    value_loss           | 0.0336        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.728        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 318          |
|    time_elapsed         | 1957         |
|    total_timesteps      | 325632       |
| train/                  |              |
|    approx_kl            | 0.0010675956 |
|    clip_fraction        | 0.0154       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.214       |
|    explained_variance   | 0.0011       |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00551      |
|    n_updates            | 3170         |
|    policy_gradient_loss | -0.00256     |
|    reward               | 0.0          |
|    value_loss           | 0.021        |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.729        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 319          |
|    time_elapsed         | 1964         |
|    total_timesteps      | 326656       |
| train/                  |              |
|    approx_kl            | 0.0002664202 |
|    clip_fraction        | 0.000195     |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.211       |
|    explained_variance   | 0.000152     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00698      |
|    n_updates            | 3180         |
|    policy_gradient_loss | 0.000473     |
|    reward               | 0.014673163  |
|    value_loss           | 0.0271       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.725        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 320          |
|    time_elapsed         | 1970         |
|    total_timesteps      | 327680       |
| train/                  |              |
|    approx_kl            | 0.001601697  |
|    clip_fraction        | 0.0213       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.254       |
|    explained_variance   | 0.000462     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00158      |
|    n_updates            | 3190         |
|    policy_gradient_loss | 0.000365     |
|    reward               | -0.052437905 |
|    value_loss           | 0.0242       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.72         |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 321          |
|    time_elapsed         | 1977         |
|    total_timesteps      | 328704       |
| train/                  |              |
|    approx_kl            | 0.0012887989 |
|    clip_fraction        | 0.0154       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.223       |
|    explained_variance   | 0.000853     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00356     |
|    n_updates            | 3200         |
|    policy_gradient_loss | -0.00267     |
|    reward               | 0.0          |
|    value_loss           | 0.019        |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.733         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 322           |
|    time_elapsed         | 1983          |
|    total_timesteps      | 329728        |
| train/                  |               |
|    approx_kl            | 0.00012882386 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.217        |
|    explained_variance   | 0.000719      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00411       |
|    n_updates            | 3210          |
|    policy_gradient_loss | 0.000207      |
|    reward               | -0.012046644  |
|    value_loss           | 0.0254        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.733        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 323          |
|    time_elapsed         | 1989         |
|    total_timesteps      | 330752       |
| train/                  |              |
|    approx_kl            | 0.0006161611 |
|    clip_fraction        | 0.012        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.225       |
|    explained_variance   | 0.000271     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.000414     |
|    n_updates            | 3220         |
|    policy_gradient_loss | -0.0011      |
|    reward               | 0.0020275554 |
|    value_loss           | 0.0249       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.735        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 324          |
|    time_elapsed         | 1995         |
|    total_timesteps      | 331776       |
| train/                  |              |
|    approx_kl            | 0.0005118565 |
|    clip_fraction        | 0.00576      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.243       |
|    explained_variance   | 0.000453     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00787      |
|    n_updates            | 3230         |
|    policy_gradient_loss | 0.000544     |
|    reward               | 0.04340106   |
|    value_loss           | 0.0249       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.735         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 325           |
|    time_elapsed         | 2001          |
|    total_timesteps      | 332800        |
| train/                  |               |
|    approx_kl            | 0.00038873288 |
|    clip_fraction        | 0.0105        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.218        |
|    explained_variance   | 0.000626      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00155       |
|    n_updates            | 3240          |
|    policy_gradient_loss | -0.00179      |
|    reward               | -0.008828214  |
|    value_loss           | 0.0253        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.73         |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 326          |
|    time_elapsed         | 2007         |
|    total_timesteps      | 333824       |
| train/                  |              |
|    approx_kl            | 0.000858676  |
|    clip_fraction        | 0.0101       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.227       |
|    explained_variance   | 0.00104      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00496      |
|    n_updates            | 3250         |
|    policy_gradient_loss | -0.000231    |
|    reward               | 0.0045827967 |
|    value_loss           | 0.0227       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.722        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 327          |
|    time_elapsed         | 2014         |
|    total_timesteps      | 334848       |
| train/                  |              |
|    approx_kl            | 0.0006575821 |
|    clip_fraction        | 0.0171       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.243       |
|    explained_variance   | 0.000167     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00222     |
|    n_updates            | 3260         |
|    policy_gradient_loss | -0.0008      |
|    reward               | 0.0          |
|    value_loss           | 0.0212       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.722        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 328          |
|    time_elapsed         | 2020         |
|    total_timesteps      | 335872       |
| train/                  |              |
|    approx_kl            | 0.0013715404 |
|    clip_fraction        | 0.0134       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.227       |
|    explained_variance   | 0.000485     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00506     |
|    n_updates            | 3270         |
|    policy_gradient_loss | -0.00124     |
|    reward               | 0.012059284  |
|    value_loss           | 0.0244       |
------------------------------------------


--------------------------------------------
| rollout/                |                |
|    ep_len_mean          | 1e+03          |
|    ep_rew_mean          | 0.718          |
| time/                   |                |
|    fps                  | 166            |
|    iterations           | 330            |
|    time_elapsed         | 2032           |
|    total_timesteps      | 337920         |
| train/                  |                |
|    approx_kl            | 0.00080562854  |
|    clip_fraction        | 0.0128         |
|    clip_range           | 0.2            |
|    entropy_loss         | -0.191         |
|    explained_variance   | 0.000442       |
|    learning_rate        | 0.0002         |
|    loss                 | -0.00598       |
|    n_updates            | 3290           |
|    policy_gradient_loss | -0.00153       |
|    reward               | -0.00063839735 |
|    value_loss           | 0.0275         |
--------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.718         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 331           |
|    time_elapsed         | 2038          |
|    total_timesteps      | 338944        |
| train/                  |               |
|    approx_kl            | 0.00039182568 |
|    clip_fraction        | 0.00303       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.211        |
|    explained_variance   | 0.00142       |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00108       |
|    n_updates            | 3300          |
|    policy_gradient_loss | 0.00111       |
|    reward               | 0.0843649     |
|    value_loss           | 0.0254        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.716        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 332          |
|    time_elapsed         | 2044         |
|    total_timesteps      | 339968       |
| train/                  |              |
|    approx_kl            | 0.0008216212 |
|    clip_fraction        | 0.0166       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.209       |
|    explained_variance   | 0.00116      |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00392     |
|    n_updates            | 3310         |
|    policy_gradient_loss | -0.00138     |
|    reward               | -0.034238655 |
|    value_loss           | 0.0166       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.71          |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 333           |
|    time_elapsed         | 2051          |
|    total_timesteps      | 340992        |
| train/                  |               |
|    approx_kl            | 0.00040662393 |
|    clip_fraction        | 0.0139        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.235        |
|    explained_variance   | 0.00112       |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00469       |
|    n_updates            | 3320          |
|    policy_gradient_loss | 0.000955      |
|    reward               | -0.029341651  |
|    value_loss           | 0.0248        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.715        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 334          |
|    time_elapsed         | 2056         |
|    total_timesteps      | 342016       |
| train/                  |              |
|    approx_kl            | 0.0012844699 |
|    clip_fraction        | 0.025        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.239       |
|    explained_variance   | 0.00126      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0168       |
|    n_updates            | 3330         |
|    policy_gradient_loss | -0.00167     |
|    reward               | 0.061369453  |
|    value_loss           | 0.0226       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.711         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 335           |
|    time_elapsed         | 2062          |
|    total_timesteps      | 343040        |
| train/                  |               |
|    approx_kl            | 0.00075574365 |
|    clip_fraction        | 0.0121        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.25         |
|    explained_variance   | -0.000213     |
|    learning_rate        | 0.0002        |
|    loss                 | -0.000879     |
|    n_updates            | 3340          |
|    policy_gradient_loss | -0.000795     |
|    reward               | -0.025728967  |
|    value_loss           | 0.0266        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.714        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 336          |
|    time_elapsed         | 2068         |
|    total_timesteps      | 344064       |
| train/                  |              |
|    approx_kl            | 0.0009236524 |
|    clip_fraction        | 0.00928      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.27        |
|    explained_variance   | 0.000416     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00703      |
|    n_updates            | 3350         |
|    policy_gradient_loss | 0.000582     |
|    reward               | 0.0          |
|    value_loss           | 0.0249       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.714         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 337           |
|    time_elapsed         | 2074          |
|    total_timesteps      | 345088        |
| train/                  |               |
|    approx_kl            | 0.00084042165 |
|    clip_fraction        | 0.0138        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.292        |
|    explained_variance   | 0.00114       |
|    learning_rate        | 0.0002        |
|    loss                 | 0.000289      |
|    n_updates            | 3360          |
|    policy_gradient_loss | 0.000445      |
|    reward               | 0.0010070207  |
|    value_loss           | 0.0202        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.712         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 338           |
|    time_elapsed         | 2080          |
|    total_timesteps      | 346112        |
| train/                  |               |
|    approx_kl            | 0.00052678504 |
|    clip_fraction        | 0.00537       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.26         |
|    explained_variance   | 0.00131       |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00656       |
|    n_updates            | 3370          |
|    policy_gradient_loss | -0.000484     |
|    reward               | 0.0           |
|    value_loss           | 0.027         |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.719         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 339           |
|    time_elapsed         | 2086          |
|    total_timesteps      | 347136        |
| train/                  |               |
|    approx_kl            | 0.00027566316 |
|    clip_fraction        | 0.00645       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.264        |
|    explained_variance   | 0.00114       |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00562       |
|    n_updates            | 3380          |
|    policy_gradient_loss | -0.000297     |
|    reward               | -0.62016195   |
|    value_loss           | 0.0241        |
-------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 0.714       |
| time/                   |             |
|    fps                  | 166         |
|    iterations           | 340         |
|    time_elapsed         | 2092        |
|    total_timesteps      | 348160      |
| train/                  |             |
|    approx_kl            | 0.001434006 |
|    clip_fraction        | 0.0145      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.229      |
|    explained_variance   | -0.000694   |
|    learning_rate        | 0.0002      |
|    loss                 | 0.00709     |
|    n_updates            | 3390        |
|    policy_gradient_loss | -0.0019     |
|    reward               | 0.0         |
|    value_loss           | 0.0229      |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.72         |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 341          |
|    time_elapsed         | 2099         |
|    total_timesteps      | 349184       |
| train/                  |              |
|    approx_kl            | 0.0013324206 |
|    clip_fraction        | 0.017        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.189       |
|    explained_variance   | 0.000649     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0109       |
|    n_updates            | 3400         |
|    policy_gradient_loss | -0.00302     |
|    reward               | 0.025945693  |
|    value_loss           | 0.0175       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.725         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 342           |
|    time_elapsed         | 2105          |
|    total_timesteps      | 350208        |
| train/                  |               |
|    approx_kl            | 0.00046495436 |
|    clip_fraction        | 0.0175        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.208        |
|    explained_variance   | 0.00035       |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0162        |
|    n_updates            | 3410          |
|    policy_gradient_loss | 2.49e-05      |
|    reward               | -0.011928633  |
|    value_loss           | 0.0284        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.728         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 343           |
|    time_elapsed         | 2111          |
|    total_timesteps      | 351232        |
| train/                  |               |
|    approx_kl            | 0.00025420566 |
|    clip_fraction        | 0.00518       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.193        |
|    explained_variance   | 0.000817      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00518       |
|    n_updates            | 3420          |
|    policy_gradient_loss | -0.00127      |
|    reward               | 0.0           |
|    value_loss           | 0.0195        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.724         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 344           |
|    time_elapsed         | 2117          |
|    total_timesteps      | 352256        |
| train/                  |               |
|    approx_kl            | 0.00041680358 |
|    clip_fraction        | 0.00752       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.215        |
|    explained_variance   | 0.000707      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0135        |
|    n_updates            | 3430          |
|    policy_gradient_loss | 0.000588      |
|    reward               | 0.048004527   |
|    value_loss           | 0.0341        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.736        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 347          |
|    time_elapsed         | 2135         |
|    total_timesteps      | 355328       |
| train/                  |              |
|    approx_kl            | 0.0010486303 |
|    clip_fraction        | 0.00889      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.206       |
|    explained_variance   | 0.000878     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.0112      |
|    n_updates            | 3460         |
|    policy_gradient_loss | -0.00181     |
|    reward               | -0.014305373 |
|    value_loss           | 0.0202       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.732        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 348          |
|    time_elapsed         | 2141         |
|    total_timesteps      | 356352       |
| train/                  |              |
|    approx_kl            | 0.0004642519 |
|    clip_fraction        | 0.0149       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.22        |
|    explained_variance   | 0.0557       |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00467      |
|    n_updates            | 3470         |
|    policy_gradient_loss | -0.000446    |
|    reward               | -0.026577802 |
|    value_loss           | 0.0209       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.729         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 349           |
|    time_elapsed         | 2147          |
|    total_timesteps      | 357376        |
| train/                  |               |
|    approx_kl            | 0.00074449315 |
|    clip_fraction        | 0.0107        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.227        |
|    explained_variance   | 0.000553      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00313       |
|    n_updates            | 3480          |
|    policy_gradient_loss | 8.14e-07      |
|    reward               | 0.02047187    |
|    value_loss           | 0.0264        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.734        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 350          |
|    time_elapsed         | 2153         |
|    total_timesteps      | 358400       |
| train/                  |              |
|    approx_kl            | 0.0005525388 |
|    clip_fraction        | 0.00713      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.211       |
|    explained_variance   | 0.000202     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00904      |
|    n_updates            | 3490         |
|    policy_gradient_loss | -0.00083     |
|    reward               | 0.0816325    |
|    value_loss           | 0.031        |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.729         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 351           |
|    time_elapsed         | 2159          |
|    total_timesteps      | 359424        |
| train/                  |               |
|    approx_kl            | 0.00062457146 |
|    clip_fraction        | 0.00576       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.208        |
|    explained_variance   | 0.000916      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0073        |
|    n_updates            | 3500          |
|    policy_gradient_loss | -0.00094      |
|    reward               | 0.0           |
|    value_loss           | 0.0259        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.731         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 353           |
|    time_elapsed         | 2171          |
|    total_timesteps      | 361472        |
| train/                  |               |
|    approx_kl            | 0.00086536235 |
|    clip_fraction        | 0.00898       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.197        |
|    explained_variance   | 0.000977      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0201        |
|    n_updates            | 3520          |
|    policy_gradient_loss | 0.00095       |
|    reward               | 0.0304644     |
|    value_loss           | 0.0259        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.718        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 354          |
|    time_elapsed         | 2178         |
|    total_timesteps      | 362496       |
| train/                  |              |
|    approx_kl            | 0.0010502877 |
|    clip_fraction        | 0.0194       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.231       |
|    explained_variance   | 0.000181     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0159       |
|    n_updates            | 3530         |
|    policy_gradient_loss | 1.22e-05     |
|    reward               | 0.0          |
|    value_loss           | 0.031        |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.715         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 355           |
|    time_elapsed         | 2184          |
|    total_timesteps      | 363520        |
| train/                  |               |
|    approx_kl            | 0.0012743869  |
|    clip_fraction        | 0.0271        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.264        |
|    explained_variance   | 0.000114      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.044         |
|    n_updates            | 3540          |
|    policy_gradient_loss | -0.000735     |
|    reward               | -0.0052591837 |
|    value_loss           | 0.0537        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.704        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 356          |
|    time_elapsed         | 2190         |
|    total_timesteps      | 364544       |
| train/                  |              |
|    approx_kl            | 0.0010649525 |
|    clip_fraction        | 0.0173       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.256       |
|    explained_variance   | 0.00037      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0127       |
|    n_updates            | 3550         |
|    policy_gradient_loss | -0.00161     |
|    reward               | -0.025876109 |
|    value_loss           | 0.0225       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.708         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 357           |
|    time_elapsed         | 2196          |
|    total_timesteps      | 365568        |
| train/                  |               |
|    approx_kl            | 0.0009203546  |
|    clip_fraction        | 0.00752       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.229        |
|    explained_variance   | 0.00068       |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00187       |
|    n_updates            | 3560          |
|    policy_gradient_loss | -0.00171      |
|    reward               | 0.00019462116 |
|    value_loss           | 0.0217        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.707        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 358          |
|    time_elapsed         | 2202         |
|    total_timesteps      | 366592       |
| train/                  |              |
|    approx_kl            | 0.0006995776 |
|    clip_fraction        | 0.00996      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.256       |
|    explained_variance   | 0.00091      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00819      |
|    n_updates            | 3570         |
|    policy_gradient_loss | 0.000251     |
|    reward               | 0.007941845  |
|    value_loss           | 0.0242       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.708        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 359          |
|    time_elapsed         | 2208         |
|    total_timesteps      | 367616       |
| train/                  |              |
|    approx_kl            | 0.0011666566 |
|    clip_fraction        | 0.0143       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.246       |
|    explained_variance   | 0.000193     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.0109      |
|    n_updates            | 3580         |
|    policy_gradient_loss | -0.00199     |
|    reward               | -0.00020002  |
|    value_loss           | 0.0208       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.711         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 360           |
|    time_elapsed         | 2214          |
|    total_timesteps      | 368640        |
| train/                  |               |
|    approx_kl            | 0.00078329653 |
|    clip_fraction        | 0.00996       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.249        |
|    explained_variance   | 0.00171       |
|    learning_rate        | 0.0002        |
|    loss                 | -0.00185      |
|    n_updates            | 3590          |
|    policy_gradient_loss | -0.000624     |
|    reward               | -5.881225e-06 |
|    value_loss           | 0.0193        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.712        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 361          |
|    time_elapsed         | 2221         |
|    total_timesteps      | 369664       |
| train/                  |              |
|    approx_kl            | 0.0011533457 |
|    clip_fraction        | 0.0174       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.279       |
|    explained_variance   | 0.00122      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.000807     |
|    n_updates            | 3600         |
|    policy_gradient_loss | -0.00036     |
|    reward               | 0.0          |
|    value_loss           | 0.0189       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.712         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 362           |
|    time_elapsed         | 2226          |
|    total_timesteps      | 370688        |
| train/                  |               |
|    approx_kl            | 0.00034657732 |
|    clip_fraction        | 0.00186       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.253        |
|    explained_variance   | 0.00078       |
|    learning_rate        | 0.0002        |
|    loss                 | 0.000583      |
|    n_updates            | 3610          |
|    policy_gradient_loss | -0.000356     |
|    reward               | 0.03425675    |
|    value_loss           | 0.0244        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.696        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 364          |
|    time_elapsed         | 2239         |
|    total_timesteps      | 372736       |
| train/                  |              |
|    approx_kl            | 0.0017072234 |
|    clip_fraction        | 0.0301       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.267       |
|    explained_variance   | 0.000517     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00444     |
|    n_updates            | 3630         |
|    policy_gradient_loss | -0.00436     |
|    reward               | 0.007609969  |
|    value_loss           | 0.0218       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.696        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 365          |
|    time_elapsed         | 2245         |
|    total_timesteps      | 373760       |
| train/                  |              |
|    approx_kl            | 0.0010600273 |
|    clip_fraction        | 0.0131       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.25        |
|    explained_variance   | 0.00107      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00827      |
|    n_updates            | 3640         |
|    policy_gradient_loss | -0.0014      |
|    reward               | 0.0          |
|    value_loss           | 0.0208       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.706         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 366           |
|    time_elapsed         | 2252          |
|    total_timesteps      | 374784        |
| train/                  |               |
|    approx_kl            | 0.00061635463 |
|    clip_fraction        | 0.00352       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.227        |
|    explained_variance   | 0.000352      |
|    learning_rate        | 0.0002        |
|    loss                 | -0.00369      |
|    n_updates            | 3650          |
|    policy_gradient_loss | -0.000447     |
|    reward               | -0.06800889   |
|    value_loss           | 0.0215        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.705         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 367           |
|    time_elapsed         | 2258          |
|    total_timesteps      | 375808        |
| train/                  |               |
|    approx_kl            | 0.00066352315 |
|    clip_fraction        | 0.00742       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.223        |
|    explained_variance   | 0.000322      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00526       |
|    n_updates            | 3660          |
|    policy_gradient_loss | 0.000244      |
|    reward               | 0.0           |
|    value_loss           | 0.0246        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.7           |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 368           |
|    time_elapsed         | 2264          |
|    total_timesteps      | 376832        |
| train/                  |               |
|    approx_kl            | 0.00033718027 |
|    clip_fraction        | 0.00801       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.193        |
|    explained_variance   | 0.000941      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00433       |
|    n_updates            | 3670          |
|    policy_gradient_loss | -0.00178      |
|    reward               | -0.0004138522 |
|    value_loss           | 0.018         |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.704        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 369          |
|    time_elapsed         | 2270         |
|    total_timesteps      | 377856       |
| train/                  |              |
|    approx_kl            | 0.0007560491 |
|    clip_fraction        | 0.0126       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.185       |
|    explained_variance   | 0.00114      |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00578     |
|    n_updates            | 3680         |
|    policy_gradient_loss | -0.00209     |
|    reward               | 0.020476893  |
|    value_loss           | 0.0234       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.7           |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 370           |
|    time_elapsed         | 2276          |
|    total_timesteps      | 378880        |
| train/                  |               |
|    approx_kl            | 0.00060719455 |
|    clip_fraction        | 0.00684       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.204        |
|    explained_variance   | 0.000922      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00461       |
|    n_updates            | 3690          |
|    policy_gradient_loss | 0.000537      |
|    reward               | -0.013937898  |
|    value_loss           | 0.0263        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.71         |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 371          |
|    time_elapsed         | 2283         |
|    total_timesteps      | 379904       |
| train/                  |              |
|    approx_kl            | 0.0047494173 |
|    clip_fraction        | 0.0167       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.261       |
|    explained_variance   | -0.0017      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00232      |
|    n_updates            | 3700         |
|    policy_gradient_loss | -0.000488    |
|    reward               | 0.0          |
|    value_loss           | 0.0268       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.714         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 372           |
|    time_elapsed         | 2289          |
|    total_timesteps      | 380928        |
| train/                  |               |
|    approx_kl            | 0.0007355644  |
|    clip_fraction        | 0.00283       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.285        |
|    explained_variance   | -0.00042      |
|    learning_rate        | 0.0002        |
|    loss                 | -0.0022       |
|    n_updates            | 3710          |
|    policy_gradient_loss | 0.00046       |
|    reward               | -0.0027490542 |
|    value_loss           | 0.0193        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.726        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 373          |
|    time_elapsed         | 2295         |
|    total_timesteps      | 381952       |
| train/                  |              |
|    approx_kl            | 0.0008005757 |
|    clip_fraction        | 0.0144       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.253       |
|    explained_variance   | 0.00136      |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00593     |
|    n_updates            | 3720         |
|    policy_gradient_loss | -0.000748    |
|    reward               | 0.0008975893 |
|    value_loss           | 0.024        |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.729        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 374          |
|    time_elapsed         | 2301         |
|    total_timesteps      | 382976       |
| train/                  |              |
|    approx_kl            | 0.0007544647 |
|    clip_fraction        | 0.00889      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.252       |
|    explained_variance   | 0.000358     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00838      |
|    n_updates            | 3730         |
|    policy_gradient_loss | -0.000152    |
|    reward               | 0.0          |
|    value_loss           | 0.023        |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.732        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 375          |
|    time_elapsed         | 2307         |
|    total_timesteps      | 384000       |
| train/                  |              |
|    approx_kl            | 0.0010279812 |
|    clip_fraction        | 0.0165       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.248       |
|    explained_variance   | 0.000404     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.000754    |
|    n_updates            | 3740         |
|    policy_gradient_loss | -0.000644    |
|    reward               | 0.0          |
|    value_loss           | 0.0241       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.736         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 376           |
|    time_elapsed         | 2314          |
|    total_timesteps      | 385024        |
| train/                  |               |
|    approx_kl            | 0.00058182504 |
|    clip_fraction        | 0.00781       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.223        |
|    explained_variance   | 0.000309      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00722       |
|    n_updates            | 3750          |
|    policy_gradient_loss | -0.00102      |
|    reward               | 0.0037911246  |
|    value_loss           | 0.0235        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.733        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 377          |
|    time_elapsed         | 2320         |
|    total_timesteps      | 386048       |
| train/                  |              |
|    approx_kl            | 0.0014728829 |
|    clip_fraction        | 0.02         |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.19        |
|    explained_variance   | 0.000476     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.000785     |
|    n_updates            | 3760         |
|    policy_gradient_loss | -0.00363     |
|    reward               | 0.0          |
|    value_loss           | 0.0258       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.735         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 378           |
|    time_elapsed         | 2326          |
|    total_timesteps      | 387072        |
| train/                  |               |
|    approx_kl            | 0.00023002672 |
|    clip_fraction        | 0.00791       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.2          |
|    explained_variance   | 0.00151       |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0211        |
|    n_updates            | 3770          |
|    policy_gradient_loss | -9.16e-05     |
|    reward               | 0.011300398   |
|    value_loss           | 0.0314        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.746        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 380          |
|    time_elapsed         | 2339         |
|    total_timesteps      | 389120       |
| train/                  |              |
|    approx_kl            | 0.0007763592 |
|    clip_fraction        | 0.016        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.242       |
|    explained_variance   | 0.000524     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00352      |
|    n_updates            | 3790         |
|    policy_gradient_loss | 0.000265     |
|    reward               | 0.0          |
|    value_loss           | 0.0271       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.741        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 381          |
|    time_elapsed         | 2345         |
|    total_timesteps      | 390144       |
| train/                  |              |
|    approx_kl            | 0.0018795998 |
|    clip_fraction        | 0.00889      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.221       |
|    explained_variance   | 0.000888     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00862      |
|    n_updates            | 3800         |
|    policy_gradient_loss | -0.00186     |
|    reward               | 0.013111487  |
|    value_loss           | 0.0225       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.745         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 382           |
|    time_elapsed         | 2351          |
|    total_timesteps      | 391168        |
| train/                  |               |
|    approx_kl            | 0.00071116956 |
|    clip_fraction        | 0.00654       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.214        |
|    explained_variance   | 0.00163       |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00538       |
|    n_updates            | 3810          |
|    policy_gradient_loss | -0.000371     |
|    reward               | -0.0022412615 |
|    value_loss           | 0.0283        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.741        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 383          |
|    time_elapsed         | 2356         |
|    total_timesteps      | 392192       |
| train/                  |              |
|    approx_kl            | 0.0005750973 |
|    clip_fraction        | 0.00361      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.211       |
|    explained_variance   | 0.000591     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00283     |
|    n_updates            | 3820         |
|    policy_gradient_loss | 0.000218     |
|    reward               | 0.023319943  |
|    value_loss           | 0.0286       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.743         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 384           |
|    time_elapsed         | 2363          |
|    total_timesteps      | 393216        |
| train/                  |               |
|    approx_kl            | 0.00070930313 |
|    clip_fraction        | 0.0188        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.228        |
|    explained_variance   | 0.000388      |
|    learning_rate        | 0.0002        |
|    loss                 | -0.0019       |
|    n_updates            | 3830          |
|    policy_gradient_loss | -0.00232      |
|    reward               | 0.012164862   |
|    value_loss           | 0.0258        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.753         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 385           |
|    time_elapsed         | 2368          |
|    total_timesteps      | 394240        |
| train/                  |               |
|    approx_kl            | 0.00013508368 |
|    clip_fraction        | 0.00898       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.251        |
|    explained_variance   | 0.00232       |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0027        |
|    n_updates            | 3840          |
|    policy_gradient_loss | 0.00092       |
|    reward               | 0.07181089    |
|    value_loss           | 0.0278        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.761         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 386           |
|    time_elapsed         | 2375          |
|    total_timesteps      | 395264        |
| train/                  |               |
|    approx_kl            | 0.00010656763 |
|    clip_fraction        | 0.0041        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.221        |
|    explained_variance   | 0.00033       |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00334       |
|    n_updates            | 3850          |
|    policy_gradient_loss | -0.00103      |
|    reward               | 0.0           |
|    value_loss           | 0.0197        |
-------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 0.771       |
| time/                   |             |
|    fps                  | 166         |
|    iterations           | 387         |
|    time_elapsed         | 2381        |
|    total_timesteps      | 396288      |
| train/                  |             |
|    approx_kl            | 0.001164339 |
|    clip_fraction        | 0.0145      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.212      |
|    explained_variance   | 0.00203     |
|    learning_rate        | 0.0002      |
|    loss                 | 0.003       |
|    n_updates            | 3860        |
|    policy_gradient_loss | -0.00212    |
|    reward               | 0.0         |
|    value_loss           | 0.0205      |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.774        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 388          |
|    time_elapsed         | 2387         |
|    total_timesteps      | 397312       |
| train/                  |              |
|    approx_kl            | 0.0034683545 |
|    clip_fraction        | 0.0214       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.231       |
|    explained_variance   | 0.000109     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00165      |
|    n_updates            | 3870         |
|    policy_gradient_loss | -0.00215     |
|    reward               | 0.0          |
|    value_loss           | 0.0287       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.776         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 389           |
|    time_elapsed         | 2393          |
|    total_timesteps      | 398336        |
| train/                  |               |
|    approx_kl            | 0.00024479697 |
|    clip_fraction        | 0.00244       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.218        |
|    explained_variance   | 0.000413      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00188       |
|    n_updates            | 3880          |
|    policy_gradient_loss | -0.000273     |
|    reward               | 0.0042772107  |
|    value_loss           | 0.0231        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.77          |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 390           |
|    time_elapsed         | 2400          |
|    total_timesteps      | 399360        |
| train/                  |               |
|    approx_kl            | 0.00036482653 |
|    clip_fraction        | 0.00781       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.194        |
|    explained_variance   | 0.000769      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00489       |
|    n_updates            | 3890          |
|    policy_gradient_loss | -0.0019       |
|    reward               | 0.003506179   |
|    value_loss           | 0.0254        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.762         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 391           |
|    time_elapsed         | 2406          |
|    total_timesteps      | 400384        |
| train/                  |               |
|    approx_kl            | 0.00057839276 |
|    clip_fraction        | 0.015         |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.19         |
|    explained_variance   | 0.00223       |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00857       |
|    n_updates            | 3900          |
|    policy_gradient_loss | -0.00225      |
|    reward               | 0.06700383    |
|    value_loss           | 0.0224        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.761         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 392           |
|    time_elapsed         | 2412          |
|    total_timesteps      | 401408        |
| train/                  |               |
|    approx_kl            | 0.00065283675 |
|    clip_fraction        | 0.0102        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.181        |
|    explained_variance   | 0.00103       |
|    learning_rate        | 0.0002        |
|    loss                 | 0.007         |
|    n_updates            | 3910          |
|    policy_gradient_loss | -0.00158      |
|    reward               | -0.07969136   |
|    value_loss           | 0.0239        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.763        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 393          |
|    time_elapsed         | 2418         |
|    total_timesteps      | 402432       |
| train/                  |              |
|    approx_kl            | 0.0008328557 |
|    clip_fraction        | 0.0171       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.192       |
|    explained_variance   | 0.000811     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00505     |
|    n_updates            | 3920         |
|    policy_gradient_loss | -0.00144     |
|    reward               | -0.07119728  |
|    value_loss           | 0.0305       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.755         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 394           |
|    time_elapsed         | 2424          |
|    total_timesteps      | 403456        |
| train/                  |               |
|    approx_kl            | 0.00022649078 |
|    clip_fraction        | 0.0083        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.198        |
|    explained_variance   | 0.000583      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00807       |
|    n_updates            | 3930          |
|    policy_gradient_loss | 7.09e-05      |
|    reward               | 0.0197064     |
|    value_loss           | 0.0288        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.755        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 395          |
|    time_elapsed         | 2430         |
|    total_timesteps      | 404480       |
| train/                  |              |
|    approx_kl            | 0.0010525655 |
|    clip_fraction        | 0.013        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.229       |
|    explained_variance   | 0.000622     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.000648     |
|    n_updates            | 3940         |
|    policy_gradient_loss | 0.000787     |
|    reward               | -0.031965826 |
|    value_loss           | 0.0234       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.753        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 396          |
|    time_elapsed         | 2436         |
|    total_timesteps      | 405504       |
| train/                  |              |
|    approx_kl            | 0.0010690931 |
|    clip_fraction        | 0.0273       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.28        |
|    explained_variance   | 0.000812     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00783     |
|    n_updates            | 3950         |
|    policy_gradient_loss | 8.37e-05     |
|    reward               | 0.0          |
|    value_loss           | 0.027        |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.753        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 397          |
|    time_elapsed         | 2442         |
|    total_timesteps      | 406528       |
| train/                  |              |
|    approx_kl            | 0.0013828594 |
|    clip_fraction        | 0.0303       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.332       |
|    explained_variance   | 0.000901     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00504      |
|    n_updates            | 3960         |
|    policy_gradient_loss | -0.000634    |
|    reward               | 0.007660883  |
|    value_loss           | 0.0228       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.748        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 398          |
|    time_elapsed         | 2448         |
|    total_timesteps      | 407552       |
| train/                  |              |
|    approx_kl            | 0.0014051546 |
|    clip_fraction        | 0.021        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.315       |
|    explained_variance   | 0.000848     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.011       |
|    n_updates            | 3970         |
|    policy_gradient_loss | -0.00163     |
|    reward               | 0.0          |
|    value_loss           | 0.0261       |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 0.745       |
| time/                   |             |
|    fps                  | 166         |
|    iterations           | 399         |
|    time_elapsed         | 2454        |
|    total_timesteps      | 408576      |
| train/                  |             |
|    approx_kl            | 0.000590654 |
|    clip_fraction        | 0.0152      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.331      |
|    explained_variance   | 0.000645    |
|    learning_rate        | 0.0002      |
|    loss                 | -0.00188    |
|    n_updates            | 3980        |
|    policy_gradient_loss | -0.000561   |
|    reward               | 0.010853228 |
|    value_loss           | 0.0203      |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.739        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 400          |
|    time_elapsed         | 2460         |
|    total_timesteps      | 409600       |
| train/                  |              |
|    approx_kl            | 0.0011810753 |
|    clip_fraction        | 0.0152       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.322       |
|    explained_variance   | 5.32e-05     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.0197      |
|    n_updates            | 3990         |
|    policy_gradient_loss | -0.00171     |
|    reward               | 0.0070158266 |
|    value_loss           | 0.0187       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.748        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 401          |
|    time_elapsed         | 2466         |
|    total_timesteps      | 410624       |
| train/                  |              |
|    approx_kl            | 0.0016191655 |
|    clip_fraction        | 0.0182       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.282       |
|    explained_variance   | 0.000185     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00255     |
|    n_updates            | 4000         |
|    policy_gradient_loss | -0.00238     |
|    reward               | 0.007844996  |
|    value_loss           | 0.0168       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.742        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 402          |
|    time_elapsed         | 2472         |
|    total_timesteps      | 411648       |
| train/                  |              |
|    approx_kl            | 0.0014542106 |
|    clip_fraction        | 0.0149       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.253       |
|    explained_variance   | 0.000548     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.000857    |
|    n_updates            | 4010         |
|    policy_gradient_loss | -0.00255     |
|    reward               | 0.028251795  |
|    value_loss           | 0.0205       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.746        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 403          |
|    time_elapsed         | 2478         |
|    total_timesteps      | 412672       |
| train/                  |              |
|    approx_kl            | 0.0008248387 |
|    clip_fraction        | 0.00518      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.245       |
|    explained_variance   | -0.000209    |
|    learning_rate        | 0.0002       |
|    loss                 | 0.000339     |
|    n_updates            | 4020         |
|    policy_gradient_loss | -5.25e-05    |
|    reward               | 0.031071758  |
|    value_loss           | 0.0273       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.751        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 404          |
|    time_elapsed         | 2484         |
|    total_timesteps      | 413696       |
| train/                  |              |
|    approx_kl            | 0.0012649419 |
|    clip_fraction        | 0.0175       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.213       |
|    explained_variance   | 0.00062      |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00144     |
|    n_updates            | 4030         |
|    policy_gradient_loss | -0.00311     |
|    reward               | 0.0          |
|    value_loss           | 0.0206       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.751        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 405          |
|    time_elapsed         | 2490         |
|    total_timesteps      | 414720       |
| train/                  |              |
|    approx_kl            | 0.0010947307 |
|    clip_fraction        | 0.0165       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.21        |
|    explained_variance   | 0.00094      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0111       |
|    n_updates            | 4040         |
|    policy_gradient_loss | -0.00208     |
|    reward               | 0.0          |
|    value_loss           | 0.0255       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.744        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 406          |
|    time_elapsed         | 2496         |
|    total_timesteps      | 415744       |
| train/                  |              |
|    approx_kl            | 0.000598001  |
|    clip_fraction        | 0.0117       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.18        |
|    explained_variance   | 0.000871     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.000503     |
|    n_updates            | 4050         |
|    policy_gradient_loss | -0.00306     |
|    reward               | 0.0067127976 |
|    value_loss           | 0.0238       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.751        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 407          |
|    time_elapsed         | 2502         |
|    total_timesteps      | 416768       |
| train/                  |              |
|    approx_kl            | 0.0005773346 |
|    clip_fraction        | 0.0161       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.199       |
|    explained_variance   | 0.000479     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.021        |
|    n_updates            | 4060         |
|    policy_gradient_loss | -0.000841    |
|    reward               | 0.12175085   |
|    value_loss           | 0.0496       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.767         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 409           |
|    time_elapsed         | 2514          |
|    total_timesteps      | 418816        |
| train/                  |               |
|    approx_kl            | 0.00030018203 |
|    clip_fraction        | 0.00674       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.208        |
|    explained_variance   | 0.000562      |
|    learning_rate        | 0.0002        |
|    loss                 | -0.00537      |
|    n_updates            | 4080          |
|    policy_gradient_loss | -0.0013       |
|    reward               | 0.005688386   |
|    value_loss           | 0.0256        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.761        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 410          |
|    time_elapsed         | 2520         |
|    total_timesteps      | 419840       |
| train/                  |              |
|    approx_kl            | 0.0011362777 |
|    clip_fraction        | 0.0134       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.21        |
|    explained_variance   | 0.000845     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00518     |
|    n_updates            | 4090         |
|    policy_gradient_loss | -0.000913    |
|    reward               | 0.0          |
|    value_loss           | 0.0224       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.768        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 411          |
|    time_elapsed         | 2526         |
|    total_timesteps      | 420864       |
| train/                  |              |
|    approx_kl            | 0.0011085871 |
|    clip_fraction        | 0.0239       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.22        |
|    explained_variance   | 0.00183      |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00169     |
|    n_updates            | 4100         |
|    policy_gradient_loss | -0.00256     |
|    reward               | 0.036410518  |
|    value_loss           | 0.0225       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.765         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 412           |
|    time_elapsed         | 2532          |
|    total_timesteps      | 421888        |
| train/                  |               |
|    approx_kl            | 0.00040511606 |
|    clip_fraction        | 0.00928       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.224        |
|    explained_variance   | -1.24e-05     |
|    learning_rate        | 0.0002        |
|    loss                 | 0.000732      |
|    n_updates            | 4110          |
|    policy_gradient_loss | -0.000624     |
|    reward               | -0.08786611   |
|    value_loss           | 0.0225        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.776        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 413          |
|    time_elapsed         | 2538         |
|    total_timesteps      | 422912       |
| train/                  |              |
|    approx_kl            | 0.0005776948 |
|    clip_fraction        | 0.00879      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.231       |
|    explained_variance   | 0.000694     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00161      |
|    n_updates            | 4120         |
|    policy_gradient_loss | 5.7e-05      |
|    reward               | -0.039751463 |
|    value_loss           | 0.0203       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.782        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 414          |
|    time_elapsed         | 2543         |
|    total_timesteps      | 423936       |
| train/                  |              |
|    approx_kl            | 0.0008370136 |
|    clip_fraction        | 0.0161       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.236       |
|    explained_variance   | 0.000766     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00517      |
|    n_updates            | 4130         |
|    policy_gradient_loss | -0.00093     |
|    reward               | 0.0          |
|    value_loss           | 0.0294       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.78          |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 415           |
|    time_elapsed         | 2550          |
|    total_timesteps      | 424960        |
| train/                  |               |
|    approx_kl            | 0.00021358149 |
|    clip_fraction        | 0.0083        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.256        |
|    explained_variance   | 0.00119       |
|    learning_rate        | 0.0002        |
|    loss                 | -0.0023       |
|    n_updates            | 4140          |
|    policy_gradient_loss | 0.000906      |
|    reward               | 0.0           |
|    value_loss           | 0.016         |
-------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 0.781       |
| time/                   |             |
|    fps                  | 166         |
|    iterations           | 416         |
|    time_elapsed         | 2556        |
|    total_timesteps      | 425984      |
| train/                  |             |
|    approx_kl            | 0.000609421 |
|    clip_fraction        | 0.00742     |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.238      |
|    explained_variance   | 0.000476    |
|    learning_rate        | 0.0002      |
|    loss                 | -0.00548    |
|    n_updates            | 4150        |
|    policy_gradient_loss | -0.000904   |
|    reward               | 0.0         |
|    value_loss           | 0.0179      |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.781        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 417          |
|    time_elapsed         | 2562         |
|    total_timesteps      | 427008       |
| train/                  |              |
|    approx_kl            | 0.0012183621 |
|    clip_fraction        | 0.016        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.215       |
|    explained_variance   | 0.000603     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.0095      |
|    n_updates            | 4160         |
|    policy_gradient_loss | -0.00275     |
|    reward               | 0.0          |
|    value_loss           | 0.0274       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.778        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 418          |
|    time_elapsed         | 2569         |
|    total_timesteps      | 428032       |
| train/                  |              |
|    approx_kl            | 0.0015546253 |
|    clip_fraction        | 0.0215       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.255       |
|    explained_variance   | 0.000363     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.016        |
|    n_updates            | 4170         |
|    policy_gradient_loss | -0.000137    |
|    reward               | 0.0          |
|    value_loss           | 0.0437       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.781        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 419          |
|    time_elapsed         | 2574         |
|    total_timesteps      | 429056       |
| train/                  |              |
|    approx_kl            | 0.0003976609 |
|    clip_fraction        | 0.0132       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.29        |
|    explained_variance   | 0.00135      |
|    learning_rate        | 0.0002       |
|    loss                 | -0.0021      |
|    n_updates            | 4180         |
|    policy_gradient_loss | 0.00027      |
|    reward               | 0.0          |
|    value_loss           | 0.0275       |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 0.785       |
| time/                   |             |
|    fps                  | 166         |
|    iterations           | 420         |
|    time_elapsed         | 2581        |
|    total_timesteps      | 430080      |
| train/                  |             |
|    approx_kl            | 0.002130276 |
|    clip_fraction        | 0.0338      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.245      |
|    explained_variance   | 0.000763    |
|    learning_rate        | 0.0002      |
|    loss                 | 0.00881     |
|    n_updates            | 4190        |
|    policy_gradient_loss | -0.00529    |
|    reward               | 0.0         |
|    value_loss           | 0.0179      |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 0.767       |
| time/                   |             |
|    fps                  | 166         |
|    iterations           | 421         |
|    time_elapsed         | 2587        |
|    total_timesteps      | 431104      |
| train/                  |             |
|    approx_kl            | 0.001030925 |
|    clip_fraction        | 0.0102      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.243      |
|    explained_variance   | 0.00302     |
|    learning_rate        | 0.0002      |
|    loss                 | 0.00723     |
|    n_updates            | 4200        |
|    policy_gradient_loss | 2.93e-05    |
|    reward               | 0.0         |
|    value_loss           | 0.0271      |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.764        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 422          |
|    time_elapsed         | 2592         |
|    total_timesteps      | 432128       |
| train/                  |              |
|    approx_kl            | 0.0012069454 |
|    clip_fraction        | 0.0175       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.209       |
|    explained_variance   | 0.000393     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0103       |
|    n_updates            | 4210         |
|    policy_gradient_loss | -0.00166     |
|    reward               | 0.0          |
|    value_loss           | 0.0261       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.768        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 423          |
|    time_elapsed         | 2599         |
|    total_timesteps      | 433152       |
| train/                  |              |
|    approx_kl            | 0.000989568  |
|    clip_fraction        | 0.0084       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.211       |
|    explained_variance   | 0.00092      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00636      |
|    n_updates            | 4220         |
|    policy_gradient_loss | -0.000194    |
|    reward               | 0.0046740416 |
|    value_loss           | 0.0242       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.766         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 424           |
|    time_elapsed         | 2605          |
|    total_timesteps      | 434176        |
| train/                  |               |
|    approx_kl            | 0.00042298448 |
|    clip_fraction        | 0.0237        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.236        |
|    explained_variance   | 0.000171      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.000565      |
|    n_updates            | 4230          |
|    policy_gradient_loss | -0.00134      |
|    reward               | 0.0           |
|    value_loss           | 0.0256        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.765        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 425          |
|    time_elapsed         | 2610         |
|    total_timesteps      | 435200       |
| train/                  |              |
|    approx_kl            | 0.0006895574 |
|    clip_fraction        | 0.00957      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.194       |
|    explained_variance   | -1.11e-05    |
|    learning_rate        | 0.0002       |
|    loss                 | -0.000426    |
|    n_updates            | 4240         |
|    policy_gradient_loss | -0.00175     |
|    reward               | 0.011999352  |
|    value_loss           | 0.0242       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.768        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 426          |
|    time_elapsed         | 2616         |
|    total_timesteps      | 436224       |
| train/                  |              |
|    approx_kl            | 0.0005757168 |
|    clip_fraction        | 0.0164       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.189       |
|    explained_variance   | 0.000647     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00401     |
|    n_updates            | 4250         |
|    policy_gradient_loss | -0.00201     |
|    reward               | 0.018389136  |
|    value_loss           | 0.0243       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.769         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 427           |
|    time_elapsed         | 2623          |
|    total_timesteps      | 437248        |
| train/                  |               |
|    approx_kl            | 0.00062785065 |
|    clip_fraction        | 0.0135        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.186        |
|    explained_variance   | 0.000182      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0173        |
|    n_updates            | 4260          |
|    policy_gradient_loss | -0.00103      |
|    reward               | 0.00078031654 |
|    value_loss           | 0.0383        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.777        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 430          |
|    time_elapsed         | 2640         |
|    total_timesteps      | 440320       |
| train/                  |              |
|    approx_kl            | 0.0002216252 |
|    clip_fraction        | 0.00293      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.176       |
|    explained_variance   | 0.000431     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00692      |
|    n_updates            | 4290         |
|    policy_gradient_loss | 6.41e-05     |
|    reward               | 0.0          |
|    value_loss           | 0.0325       |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 0.778       |
| time/                   |             |
|    fps                  | 166         |
|    iterations           | 431         |
|    time_elapsed         | 2646        |
|    total_timesteps      | 441344      |
| train/                  |             |
|    approx_kl            | 0.000799938 |
|    clip_fraction        | 0.0196      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.173      |
|    explained_variance   | -4.82e-05   |
|    learning_rate        | 0.0002      |
|    loss                 | 0.00239     |
|    n_updates            | 4300        |
|    policy_gradient_loss | -0.00189    |
|    reward               | 0.015477019 |
|    value_loss           | 0.0206      |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.776        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 432          |
|    time_elapsed         | 2652         |
|    total_timesteps      | 442368       |
| train/                  |              |
|    approx_kl            | 0.0004886123 |
|    clip_fraction        | 0.0114       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.188       |
|    explained_variance   | 0.000727     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00585      |
|    n_updates            | 4310         |
|    policy_gradient_loss | 5.36e-05     |
|    reward               | 0.013968519  |
|    value_loss           | 0.0281       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.778         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 433           |
|    time_elapsed         | 2658          |
|    total_timesteps      | 443392        |
| train/                  |               |
|    approx_kl            | 0.00032059004 |
|    clip_fraction        | 0.00889       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.205        |
|    explained_variance   | 0.000621      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00486       |
|    n_updates            | 4320          |
|    policy_gradient_loss | 0.000914      |
|    reward               | 0.010533961   |
|    value_loss           | 0.031         |
-------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 0.774       |
| time/                   |             |
|    fps                  | 166         |
|    iterations           | 434         |
|    time_elapsed         | 2665        |
|    total_timesteps      | 444416      |
| train/                  |             |
|    approx_kl            | 0.017094765 |
|    clip_fraction        | 0.0241      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.242      |
|    explained_variance   | 0.000774    |
|    learning_rate        | 0.0002      |
|    loss                 | 0.00447     |
|    n_updates            | 4330        |
|    policy_gradient_loss | -0.00323    |
|    reward               | 0.013043847 |
|    value_loss           | 0.0332      |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.774        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 435          |
|    time_elapsed         | 2671         |
|    total_timesteps      | 445440       |
| train/                  |              |
|    approx_kl            | 0.0017081874 |
|    clip_fraction        | 0.0282       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.278       |
|    explained_variance   | 0.000736     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00757      |
|    n_updates            | 4340         |
|    policy_gradient_loss | -0.00388     |
|    reward               | 0.03420829   |
|    value_loss           | 0.02         |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.77         |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 436          |
|    time_elapsed         | 2677         |
|    total_timesteps      | 446464       |
| train/                  |              |
|    approx_kl            | 0.0009043441 |
|    clip_fraction        | 0.00918      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.234       |
|    explained_variance   | 0.000314     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00199      |
|    n_updates            | 4350         |
|    policy_gradient_loss | -0.00144     |
|    reward               | -0.053259436 |
|    value_loss           | 0.0306       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.766         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 437           |
|    time_elapsed         | 2683          |
|    total_timesteps      | 447488        |
| train/                  |               |
|    approx_kl            | 0.00048547448 |
|    clip_fraction        | 0.00801       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.222        |
|    explained_variance   | 0.000567      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0133        |
|    n_updates            | 4360          |
|    policy_gradient_loss | -0.00174      |
|    reward               | -0.017036019  |
|    value_loss           | 0.0266        |
-------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 0.768       |
| time/                   |             |
|    fps                  | 166         |
|    iterations           | 438         |
|    time_elapsed         | 2690        |
|    total_timesteps      | 448512      |
| train/                  |             |
|    approx_kl            | 0.002321236 |
|    clip_fraction        | 0.00986     |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.276      |
|    explained_variance   | -3.66e-05   |
|    learning_rate        | 0.0002      |
|    loss                 | 0.00769     |
|    n_updates            | 4370        |
|    policy_gradient_loss | 0.000381    |
|    reward               | 0.01669854  |
|    value_loss           | 0.0239      |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.767        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 439          |
|    time_elapsed         | 2696         |
|    total_timesteps      | 449536       |
| train/                  |              |
|    approx_kl            | 0.0012294843 |
|    clip_fraction        | 0.0111       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.293       |
|    explained_variance   | 0.000454     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0131       |
|    n_updates            | 4380         |
|    policy_gradient_loss | -0.00147     |
|    reward               | -0.07361134  |
|    value_loss           | 0.0242       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.761         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 440           |
|    time_elapsed         | 2702          |
|    total_timesteps      | 450560        |
| train/                  |               |
|    approx_kl            | 0.00045311602 |
|    clip_fraction        | 0.0258        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.336        |
|    explained_variance   | 0.000535      |
|    learning_rate        | 0.0002        |
|    loss                 | -0.00726      |
|    n_updates            | 4390          |
|    policy_gradient_loss | 0.000443      |
|    reward               | 0.0           |
|    value_loss           | 0.0239        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.759        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 441          |
|    time_elapsed         | 2708         |
|    total_timesteps      | 451584       |
| train/                  |              |
|    approx_kl            | 0.0015790306 |
|    clip_fraction        | 0.018        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.345       |
|    explained_variance   | 0.00131      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00618      |
|    n_updates            | 4400         |
|    policy_gradient_loss | -0.00147     |
|    reward               | 0.0024057433 |
|    value_loss           | 0.018        |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.763        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 442          |
|    time_elapsed         | 2715         |
|    total_timesteps      | 452608       |
| train/                  |              |
|    approx_kl            | 0.0021753293 |
|    clip_fraction        | 0.0316       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.298       |
|    explained_variance   | 0.000748     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.0106      |
|    n_updates            | 4410         |
|    policy_gradient_loss | -0.0047      |
|    reward               | 0.0          |
|    value_loss           | 0.019        |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.76          |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 443           |
|    time_elapsed         | 2721          |
|    total_timesteps      | 453632        |
| train/                  |               |
|    approx_kl            | 0.00090638595 |
|    clip_fraction        | 0.0141        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.276        |
|    explained_variance   | 7.76e-05      |
|    learning_rate        | 0.0002        |
|    loss                 | -0.00901      |
|    n_updates            | 4420          |
|    policy_gradient_loss | -0.00148      |
|    reward               | 0.05419063    |
|    value_loss           | 0.0218        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.757        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 444          |
|    time_elapsed         | 2728         |
|    total_timesteps      | 454656       |
| train/                  |              |
|    approx_kl            | 0.001054064  |
|    clip_fraction        | 0.0121       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.262       |
|    explained_variance   | 0.000515     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00622      |
|    n_updates            | 4430         |
|    policy_gradient_loss | -0.000721    |
|    reward               | -0.023508374 |
|    value_loss           | 0.0198       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.76         |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 445          |
|    time_elapsed         | 2734         |
|    total_timesteps      | 455680       |
| train/                  |              |
|    approx_kl            | 0.0012718788 |
|    clip_fraction        | 0.0209       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.232       |
|    explained_variance   | 0.000911     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00742     |
|    n_updates            | 4440         |
|    policy_gradient_loss | -0.00339     |
|    reward               | -0.092678994 |
|    value_loss           | 0.021        |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.766         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 446           |
|    time_elapsed         | 2740          |
|    total_timesteps      | 456704        |
| train/                  |               |
|    approx_kl            | 0.00069048273 |
|    clip_fraction        | 0.00684       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.202        |
|    explained_variance   | 0.000822      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0104        |
|    n_updates            | 4450          |
|    policy_gradient_loss | -0.00149      |
|    reward               | 0.10203295    |
|    value_loss           | 0.0271        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.769         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 447           |
|    time_elapsed         | 2746          |
|    total_timesteps      | 457728        |
| train/                  |               |
|    approx_kl            | 0.00055771106 |
|    clip_fraction        | 0.0147        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.196        |
|    explained_variance   | 0.00217       |
|    learning_rate        | 0.0002        |
|    loss                 | 0.000496      |
|    n_updates            | 4460          |
|    policy_gradient_loss | -0.00114      |
|    reward               | 0.005503153   |
|    value_loss           | 0.023         |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.771        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 448          |
|    time_elapsed         | 2752         |
|    total_timesteps      | 458752       |
| train/                  |              |
|    approx_kl            | 0.0006914383 |
|    clip_fraction        | 0.0126       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.173       |
|    explained_variance   | 0.00084      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00789      |
|    n_updates            | 4470         |
|    policy_gradient_loss | -0.00169     |
|    reward               | 0.0038973773 |
|    value_loss           | 0.0221       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.767         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 449           |
|    time_elapsed         | 2758          |
|    total_timesteps      | 459776        |
| train/                  |               |
|    approx_kl            | 0.00046077272 |
|    clip_fraction        | 0.00273       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.16         |
|    explained_variance   | 0.000689      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0168        |
|    n_updates            | 4480          |
|    policy_gradient_loss | -0.000162     |
|    reward               | 0.025449207   |
|    value_loss           | 0.0328        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.78         |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 452          |
|    time_elapsed         | 2776         |
|    total_timesteps      | 462848       |
| train/                  |              |
|    approx_kl            | 0.0013720951 |
|    clip_fraction        | 0.0189       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.175       |
|    explained_variance   | 0.000768     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0059       |
|    n_updates            | 4510         |
|    policy_gradient_loss | -0.000557    |
|    reward               | 0.08456668   |
|    value_loss           | 0.0243       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.788         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 453           |
|    time_elapsed         | 2783          |
|    total_timesteps      | 463872        |
| train/                  |               |
|    approx_kl            | 0.00067249825 |
|    clip_fraction        | 0.0149        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.186        |
|    explained_variance   | 0.000317      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0024        |
|    n_updates            | 4520          |
|    policy_gradient_loss | -0.00168      |
|    reward               | 0.023776548   |
|    value_loss           | 0.0242        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.792        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 454          |
|    time_elapsed         | 2789         |
|    total_timesteps      | 464896       |
| train/                  |              |
|    approx_kl            | 0.0006470317 |
|    clip_fraction        | 0.00967      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.178       |
|    explained_variance   | 0.00299      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00886      |
|    n_updates            | 4530         |
|    policy_gradient_loss | -0.00108     |
|    reward               | 0.058537953  |
|    value_loss           | 0.0265       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.799         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 455           |
|    time_elapsed         | 2795          |
|    total_timesteps      | 465920        |
| train/                  |               |
|    approx_kl            | 0.00018715509 |
|    clip_fraction        | 0.004         |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.162        |
|    explained_variance   | 0.000683      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0068        |
|    n_updates            | 4540          |
|    policy_gradient_loss | -0.000618     |
|    reward               | 0.016507057   |
|    value_loss           | 0.0209        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.799        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 456          |
|    time_elapsed         | 2801         |
|    total_timesteps      | 466944       |
| train/                  |              |
|    approx_kl            | 0.0009041556 |
|    clip_fraction        | 0.0141       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.167       |
|    explained_variance   | 0.000328     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0152       |
|    n_updates            | 4550         |
|    policy_gradient_loss | -0.000639    |
|    reward               | -0.028103022 |
|    value_loss           | 0.0308       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.804        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 457          |
|    time_elapsed         | 2807         |
|    total_timesteps      | 467968       |
| train/                  |              |
|    approx_kl            | 0.0012396472 |
|    clip_fraction        | 0.0114       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.184       |
|    explained_variance   | 0.000878     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00646      |
|    n_updates            | 4560         |
|    policy_gradient_loss | 0.000291     |
|    reward               | 0.0          |
|    value_loss           | 0.0285       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.799         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 458           |
|    time_elapsed         | 2813          |
|    total_timesteps      | 468992        |
| train/                  |               |
|    approx_kl            | 0.00060188083 |
|    clip_fraction        | 0.00703       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.191        |
|    explained_variance   | -0.000219     |
|    learning_rate        | 0.0002        |
|    loss                 | -0.0049       |
|    n_updates            | 4570          |
|    policy_gradient_loss | -0.000643     |
|    reward               | 0.029612225   |
|    value_loss           | 0.0242        |
-------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 0.802       |
| time/                   |             |
|    fps                  | 166         |
|    iterations           | 459         |
|    time_elapsed         | 2820        |
|    total_timesteps      | 470016      |
| train/                  |             |
|    approx_kl            | 0.003380238 |
|    clip_fraction        | 0.0235      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.232      |
|    explained_variance   | 0.000483    |
|    learning_rate        | 0.0002      |
|    loss                 | 0.00186     |
|    n_updates            | 4580        |
|    policy_gradient_loss | -0.000644   |
|    reward               | 0.0         |
|    value_loss           | 0.0264      |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.813        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 460          |
|    time_elapsed         | 2825         |
|    total_timesteps      | 471040       |
| train/                  |              |
|    approx_kl            | 0.0006884994 |
|    clip_fraction        | 0.00996      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.226       |
|    explained_variance   | 0.000594     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.000433     |
|    n_updates            | 4590         |
|    policy_gradient_loss | -0.00101     |
|    reward               | -0.013158678 |
|    value_loss           | 0.0188       |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 0.816       |
| time/                   |             |
|    fps                  | 166         |
|    iterations           | 461         |
|    time_elapsed         | 2831        |
|    total_timesteps      | 472064      |
| train/                  |             |
|    approx_kl            | 0.000322812 |
|    clip_fraction        | 0.00371     |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.198      |
|    explained_variance   | -3.58e-05   |
|    learning_rate        | 0.0002      |
|    loss                 | 0.0165      |
|    n_updates            | 4600        |
|    policy_gradient_loss | -0.000859   |
|    reward               | 0.0         |
|    value_loss           | 0.0285      |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.825        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 462          |
|    time_elapsed         | 2837         |
|    total_timesteps      | 473088       |
| train/                  |              |
|    approx_kl            | 0.0006361365 |
|    clip_fraction        | 0.0105       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.185       |
|    explained_variance   | 0.000168     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00801      |
|    n_updates            | 4610         |
|    policy_gradient_loss | -0.000549    |
|    reward               | 0.010629117  |
|    value_loss           | 0.0384       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.823        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 463          |
|    time_elapsed         | 2844         |
|    total_timesteps      | 474112       |
| train/                  |              |
|    approx_kl            | 0.0007914555 |
|    clip_fraction        | 0.0192       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.209       |
|    explained_variance   | 0.000856     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00621      |
|    n_updates            | 4620         |
|    policy_gradient_loss | 0.000249     |
|    reward               | 0.011540491  |
|    value_loss           | 0.0297       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.815        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 464          |
|    time_elapsed         | 2850         |
|    total_timesteps      | 475136       |
| train/                  |              |
|    approx_kl            | 0.0006985292 |
|    clip_fraction        | 0.0167       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.242       |
|    explained_variance   | 0.000177     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00351      |
|    n_updates            | 4630         |
|    policy_gradient_loss | 0.000134     |
|    reward               | 0.017037546  |
|    value_loss           | 0.0271       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.822         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 465           |
|    time_elapsed         | 2856          |
|    total_timesteps      | 476160        |
| train/                  |               |
|    approx_kl            | 0.00030964497 |
|    clip_fraction        | 0.0123        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.26         |
|    explained_variance   | 0.00087       |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0129        |
|    n_updates            | 4640          |
|    policy_gradient_loss | 0.000943      |
|    reward               | 0.0016611012  |
|    value_loss           | 0.0267        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.823         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 466           |
|    time_elapsed         | 2862          |
|    total_timesteps      | 477184        |
| train/                  |               |
|    approx_kl            | 0.00091446575 |
|    clip_fraction        | 0.0119        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.232        |
|    explained_variance   | 0.000249      |
|    learning_rate        | 0.0002        |
|    loss                 | -0.00494      |
|    n_updates            | 4650          |
|    policy_gradient_loss | -0.00187      |
|    reward               | 0.005421972   |
|    value_loss           | 0.0226        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.819         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 467           |
|    time_elapsed         | 2868          |
|    total_timesteps      | 478208        |
| train/                  |               |
|    approx_kl            | 0.00031946885 |
|    clip_fraction        | 0.00361       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.215        |
|    explained_variance   | 0.00101       |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0124        |
|    n_updates            | 4660          |
|    policy_gradient_loss | -0.00065      |
|    reward               | 0.041232355   |
|    value_loss           | 0.0283        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.819        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 468          |
|    time_elapsed         | 2875         |
|    total_timesteps      | 479232       |
| train/                  |              |
|    approx_kl            | 0.0009765944 |
|    clip_fraction        | 0.0245       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.256       |
|    explained_variance   | 0.000465     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00285      |
|    n_updates            | 4670         |
|    policy_gradient_loss | -0.000468    |
|    reward               | 0.0          |
|    value_loss           | 0.0356       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.814        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 469          |
|    time_elapsed         | 2881         |
|    total_timesteps      | 480256       |
| train/                  |              |
|    approx_kl            | 0.0007695737 |
|    clip_fraction        | 0.0183       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.259       |
|    explained_variance   | 0.00332      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0123       |
|    n_updates            | 4680         |
|    policy_gradient_loss | -0.00136     |
|    reward               | 0.013108774  |
|    value_loss           | 0.0219       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.81         |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 470          |
|    time_elapsed         | 2887         |
|    total_timesteps      | 481280       |
| train/                  |              |
|    approx_kl            | 0.0017353839 |
|    clip_fraction        | 0.0349       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.226       |
|    explained_variance   | 0.00187      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.000434     |
|    n_updates            | 4690         |
|    policy_gradient_loss | -0.0054      |
|    reward               | 0.0012553931 |
|    value_loss           | 0.0171       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.803         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 471           |
|    time_elapsed         | 2893          |
|    total_timesteps      | 482304        |
| train/                  |               |
|    approx_kl            | 0.00061283243 |
|    clip_fraction        | 0.0136        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.22         |
|    explained_variance   | -0.000925     |
|    learning_rate        | 0.0002        |
|    loss                 | -0.00858      |
|    n_updates            | 4700          |
|    policy_gradient_loss | -0.00145      |
|    reward               | -0.008539434  |
|    value_loss           | 0.02          |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.794        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 473          |
|    time_elapsed         | 2905         |
|    total_timesteps      | 484352       |
| train/                  |              |
|    approx_kl            | 0.0010384722 |
|    clip_fraction        | 0.0125       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.185       |
|    explained_variance   | 0.00259      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0118       |
|    n_updates            | 4720         |
|    policy_gradient_loss | -0.0015      |
|    reward               | 0.040950242  |
|    value_loss           | 0.0201       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.792        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 474          |
|    time_elapsed         | 2912         |
|    total_timesteps      | 485376       |
| train/                  |              |
|    approx_kl            | 0.0006609731 |
|    clip_fraction        | 0.0108       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.214       |
|    explained_variance   | 0.000633     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0111       |
|    n_updates            | 4730         |
|    policy_gradient_loss | 0.00084      |
|    reward               | 0.0839686    |
|    value_loss           | 0.0246       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.796         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 475           |
|    time_elapsed         | 2917          |
|    total_timesteps      | 486400        |
| train/                  |               |
|    approx_kl            | 0.00041518366 |
|    clip_fraction        | 0.00635       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.237        |
|    explained_variance   | 0.00065       |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00493       |
|    n_updates            | 4740          |
|    policy_gradient_loss | 0.000376      |
|    reward               | 0.006549228   |
|    value_loss           | 0.031         |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.802         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 476           |
|    time_elapsed         | 2924          |
|    total_timesteps      | 487424        |
| train/                  |               |
|    approx_kl            | 0.00035272486 |
|    clip_fraction        | 0.0124        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.257        |
|    explained_variance   | -0.00018      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00374       |
|    n_updates            | 4750          |
|    policy_gradient_loss | -0.000413     |
|    reward               | 0.0           |
|    value_loss           | 0.0261        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.792         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 477           |
|    time_elapsed         | 2930          |
|    total_timesteps      | 488448        |
| train/                  |               |
|    approx_kl            | 0.00065526424 |
|    clip_fraction        | 0.028         |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.298        |
|    explained_variance   | 0.000724      |
|    learning_rate        | 0.0002        |
|    loss                 | -0.00121      |
|    n_updates            | 4760          |
|    policy_gradient_loss | -0.000408     |
|    reward               | 0.0           |
|    value_loss           | 0.0191        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.788        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 478          |
|    time_elapsed         | 2936         |
|    total_timesteps      | 489472       |
| train/                  |              |
|    approx_kl            | 0.0010196134 |
|    clip_fraction        | 0.022        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.321       |
|    explained_variance   | 0.000896     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.000691     |
|    n_updates            | 4770         |
|    policy_gradient_loss | 0.000164     |
|    reward               | -0.020724837 |
|    value_loss           | 0.0215       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.787        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 479          |
|    time_elapsed         | 2942         |
|    total_timesteps      | 490496       |
| train/                  |              |
|    approx_kl            | 0.0013660064 |
|    clip_fraction        | 0.0181       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.268       |
|    explained_variance   | 0.000582     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.0062      |
|    n_updates            | 4780         |
|    policy_gradient_loss | -0.00331     |
|    reward               | 0.03620288   |
|    value_loss           | 0.0201       |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 0.784       |
| time/                   |             |
|    fps                  | 166         |
|    iterations           | 480         |
|    time_elapsed         | 2949        |
|    total_timesteps      | 491520      |
| train/                  |             |
|    approx_kl            | 0.001438804 |
|    clip_fraction        | 0.0277      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.268      |
|    explained_variance   | 0.00053     |
|    learning_rate        | 0.0002      |
|    loss                 | 0.00217     |
|    n_updates            | 4790        |
|    policy_gradient_loss | -0.00268    |
|    reward               | 0.008048596 |
|    value_loss           | 0.0269      |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.781        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 481          |
|    time_elapsed         | 2956         |
|    total_timesteps      | 492544       |
| train/                  |              |
|    approx_kl            | 0.0015017053 |
|    clip_fraction        | 0.0148       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.26        |
|    explained_variance   | 0.000305     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00172     |
|    n_updates            | 4800         |
|    policy_gradient_loss | -0.0017      |
|    reward               | 0.0076378463 |
|    value_loss           | 0.0209       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.787         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 482           |
|    time_elapsed         | 2962          |
|    total_timesteps      | 493568        |
| train/                  |               |
|    approx_kl            | 0.00035473582 |
|    clip_fraction        | 0.00781       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.267        |
|    explained_variance   | 0.00151       |
|    learning_rate        | 0.0002        |
|    loss                 | -0.00228      |
|    n_updates            | 4810          |
|    policy_gradient_loss | 0.000781      |
|    reward               | 0.008011902   |
|    value_loss           | 0.0287        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.784         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 483           |
|    time_elapsed         | 2968          |
|    total_timesteps      | 494592        |
| train/                  |               |
|    approx_kl            | 0.00090083934 |
|    clip_fraction        | 0.00234       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.271        |
|    explained_variance   | 0.000511      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0107        |
|    n_updates            | 4820          |
|    policy_gradient_loss | 0.000937      |
|    reward               | -0.047327377  |
|    value_loss           | 0.0249        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.778         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 484           |
|    time_elapsed         | 2974          |
|    total_timesteps      | 495616        |
| train/                  |               |
|    approx_kl            | 0.00087981723 |
|    clip_fraction        | 0.00928       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.266        |
|    explained_variance   | 0.000916      |
|    learning_rate        | 0.0002        |
|    loss                 | -0.0029       |
|    n_updates            | 4830          |
|    policy_gradient_loss | -0.000582     |
|    reward               | 0.019053048   |
|    value_loss           | 0.018         |
-------------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1e+03      |
|    ep_rew_mean          | 0.767      |
| time/                   |            |
|    fps                  | 166        |
|    iterations           | 485        |
|    time_elapsed         | 2980       |
|    total_timesteps      | 496640     |
| train/                  |            |
|    approx_kl            | 0.00151706 |
|    clip_fraction        | 0.0312     |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.261     |
|    explained_variance   | 0.00103    |
|    learning_rate        | 0.0002     |
|    loss                 | -0.00822   |
|    n_updates            | 4840       |
|    policy_gradient_loss | -0.00433   |
|    reward               | 0.0        |
|    value_loss           | 0.0175     |
----------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.768         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 486           |
|    time_elapsed         | 2986          |
|    total_timesteps      | 497664        |
| train/                  |               |
|    approx_kl            | 0.00079907646 |
|    clip_fraction        | 0.0144        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.225        |
|    explained_variance   | 0.000804      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00455       |
|    n_updates            | 4850          |
|    policy_gradient_loss | -0.00272      |
|    reward               | 0.005648176   |
|    value_loss           | 0.0225        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.77          |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 487           |
|    time_elapsed         | 2992          |
|    total_timesteps      | 498688        |
| train/                  |               |
|    approx_kl            | 0.00032770255 |
|    clip_fraction        | 0.00469       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.2          |
|    explained_variance   | 0.000638      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00768       |
|    n_updates            | 4860          |
|    policy_gradient_loss | -0.000975     |
|    reward               | 0.0016122073  |
|    value_loss           | 0.0257        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.769        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 488          |
|    time_elapsed         | 2998         |
|    total_timesteps      | 499712       |
| train/                  |              |
|    approx_kl            | 0.000720169  |
|    clip_fraction        | 0.00859      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.199       |
|    explained_variance   | 0.000578     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.000714    |
|    n_updates            | 4870         |
|    policy_gradient_loss | -0.000356    |
|    reward               | -0.016997265 |
|    value_loss           | 0.026        |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.77         |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 489          |
|    time_elapsed         | 3004         |
|    total_timesteps      | 500736       |
| train/                  |              |
|    approx_kl            | 0.0012114289 |
|    clip_fraction        | 0.0115       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.236       |
|    explained_variance   | 0.00171      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00391      |
|    n_updates            | 4880         |
|    policy_gradient_loss | 0.000496     |
|    reward               | 0.0101502165 |
|    value_loss           | 0.0267       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.774        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 490          |
|    time_elapsed         | 3011         |
|    total_timesteps      | 501760       |
| train/                  |              |
|    approx_kl            | 0.0013197048 |
|    clip_fraction        | 0.0135       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.212       |
|    explained_variance   | 0.000317     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00799      |
|    n_updates            | 4890         |
|    policy_gradient_loss | -0.00248     |
|    reward               | 0.042215407  |
|    value_loss           | 0.0244       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.773        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 491          |
|    time_elapsed         | 3017         |
|    total_timesteps      | 502784       |
| train/                  |              |
|    approx_kl            | 0.0002609049 |
|    clip_fraction        | 0.00967      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.187       |
|    explained_variance   | -0.000175    |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00894      |
|    n_updates            | 4900         |
|    policy_gradient_loss | -0.000866    |
|    reward               | -0.054511867 |
|    value_loss           | 0.0257       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.772         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 492           |
|    time_elapsed         | 3023          |
|    total_timesteps      | 503808        |
| train/                  |               |
|    approx_kl            | 0.00042155915 |
|    clip_fraction        | 0.0083        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.209        |
|    explained_variance   | 0.000942      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00397       |
|    n_updates            | 4910          |
|    policy_gradient_loss | 0.000287      |
|    reward               | 0.12903239    |
|    value_loss           | 0.0303        |
-------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 0.771       |
| time/                   |             |
|    fps                  | 166         |
|    iterations           | 493         |
|    time_elapsed         | 3029        |
|    total_timesteps      | 504832      |
| train/                  |             |
|    approx_kl            | 0.001018416 |
|    clip_fraction        | 0.00937     |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.193      |
|    explained_variance   | 0.0012      |
|    learning_rate        | 0.0002      |
|    loss                 | 0.00161     |
|    n_updates            | 4920        |
|    policy_gradient_loss | -0.00218    |
|    reward               | 0.05310281  |
|    value_loss           | 0.0176      |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.775        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 494          |
|    time_elapsed         | 3035         |
|    total_timesteps      | 505856       |
| train/                  |              |
|    approx_kl            | 0.0010051331 |
|    clip_fraction        | 0.0261       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.212       |
|    explained_variance   | -7.59e-05    |
|    learning_rate        | 0.0002       |
|    loss                 | 0.000704     |
|    n_updates            | 4930         |
|    policy_gradient_loss | -0.00122     |
|    reward               | 0.02444496   |
|    value_loss           | 0.0357       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.771        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 495          |
|    time_elapsed         | 3041         |
|    total_timesteps      | 506880       |
| train/                  |              |
|    approx_kl            | 0.0007425859 |
|    clip_fraction        | 0.0165       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.23        |
|    explained_variance   | 0.000524     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0023       |
|    n_updates            | 4940         |
|    policy_gradient_loss | -0.000935    |
|    reward               | 0.012854377  |
|    value_loss           | 0.021        |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.786        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 496          |
|    time_elapsed         | 3046         |
|    total_timesteps      | 507904       |
| train/                  |              |
|    approx_kl            | 0.0011523797 |
|    clip_fraction        | 0.0194       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.235       |
|    explained_variance   | 0.00123      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00958      |
|    n_updates            | 4950         |
|    policy_gradient_loss | -0.00211     |
|    reward               | 0.0028608604 |
|    value_loss           | 0.0265       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.781         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 497           |
|    time_elapsed         | 3053          |
|    total_timesteps      | 508928        |
| train/                  |               |
|    approx_kl            | 0.00041947578 |
|    clip_fraction        | 0.00723       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.204        |
|    explained_variance   | 0.000548      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00492       |
|    n_updates            | 4960          |
|    policy_gradient_loss | -0.00154      |
|    reward               | 0.0           |
|    value_loss           | 0.0253        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.788         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 499           |
|    time_elapsed         | 3065          |
|    total_timesteps      | 510976        |
| train/                  |               |
|    approx_kl            | 0.00021028618 |
|    clip_fraction        | 0.00557       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.257        |
|    explained_variance   | -0.000217     |
|    learning_rate        | 0.0002        |
|    loss                 | 0.012         |
|    n_updates            | 4980          |
|    policy_gradient_loss | 0.000246      |
|    reward               | 0.015961839   |
|    value_loss           | 0.029         |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.789        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 500          |
|    time_elapsed         | 3071         |
|    total_timesteps      | 512000       |
| train/                  |              |
|    approx_kl            | 0.0012738165 |
|    clip_fraction        | 0.0171       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.206       |
|    explained_variance   | 0.000304     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0049       |
|    n_updates            | 4990         |
|    policy_gradient_loss | -0.00343     |
|    reward               | 0.0          |
|    value_loss           | 0.0244       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.785        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 501          |
|    time_elapsed         | 3076         |
|    total_timesteps      | 513024       |
| train/                  |              |
|    approx_kl            | 0.0010815563 |
|    clip_fraction        | 0.0159       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.173       |
|    explained_variance   | 0.000541     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00347      |
|    n_updates            | 5000         |
|    policy_gradient_loss | -0.00304     |
|    reward               | -0.032989714 |
|    value_loss           | 0.0199       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.785        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 502          |
|    time_elapsed         | 3081         |
|    total_timesteps      | 514048       |
| train/                  |              |
|    approx_kl            | 0.0056637125 |
|    clip_fraction        | 0.0131       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.192       |
|    explained_variance   | 0.000622     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00352      |
|    n_updates            | 5010         |
|    policy_gradient_loss | -0.00226     |
|    reward               | 0.035746913  |
|    value_loss           | 0.0263       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.779        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 503          |
|    time_elapsed         | 3087         |
|    total_timesteps      | 515072       |
| train/                  |              |
|    approx_kl            | 0.0013670786 |
|    clip_fraction        | 0.0177       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.256       |
|    explained_variance   | 0.000371     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0167       |
|    n_updates            | 5020         |
|    policy_gradient_loss | 0.000184     |
|    reward               | 0.0          |
|    value_loss           | 0.023        |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.781         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 504           |
|    time_elapsed         | 3093          |
|    total_timesteps      | 516096        |
| train/                  |               |
|    approx_kl            | 0.00096084113 |
|    clip_fraction        | 0.00967       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.237        |
|    explained_variance   | 0.00127       |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00542       |
|    n_updates            | 5030          |
|    policy_gradient_loss | -0.0015       |
|    reward               | 0.04245574    |
|    value_loss           | 0.0175        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.776         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 505           |
|    time_elapsed         | 3099          |
|    total_timesteps      | 517120        |
| train/                  |               |
|    approx_kl            | 0.00072936754 |
|    clip_fraction        | 0.0105        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.238        |
|    explained_variance   | 0.00308       |
|    learning_rate        | 0.0002        |
|    loss                 | 0.000405      |
|    n_updates            | 5040          |
|    policy_gradient_loss | -0.000631     |
|    reward               | 0.010614188   |
|    value_loss           | 0.0216        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.759        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 506          |
|    time_elapsed         | 3105         |
|    total_timesteps      | 518144       |
| train/                  |              |
|    approx_kl            | 0.0012914061 |
|    clip_fraction        | 0.024        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.246       |
|    explained_variance   | -0.00157     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00176      |
|    n_updates            | 5050         |
|    policy_gradient_loss | -0.00269     |
|    reward               | 0.0          |
|    value_loss           | 0.0257       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.764        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 507          |
|    time_elapsed         | 3111         |
|    total_timesteps      | 519168       |
| train/                  |              |
|    approx_kl            | 0.0007309293 |
|    clip_fraction        | 0.00986      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.26        |
|    explained_variance   | 1.92e-05     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00836      |
|    n_updates            | 5060         |
|    policy_gradient_loss | 0.000854     |
|    reward               | 0.0          |
|    value_loss           | 0.0292       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.776        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 508          |
|    time_elapsed         | 3116         |
|    total_timesteps      | 520192       |
| train/                  |              |
|    approx_kl            | 0.0010397498 |
|    clip_fraction        | 0.00752      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.219       |
|    explained_variance   | -0.000131    |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00399      |
|    n_updates            | 5070         |
|    policy_gradient_loss | -0.00102     |
|    reward               | 0.007844092  |
|    value_loss           | 0.0247       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.775         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 509           |
|    time_elapsed         | 3122          |
|    total_timesteps      | 521216        |
| train/                  |               |
|    approx_kl            | 0.00014603097 |
|    clip_fraction        | 0.00596       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.222        |
|    explained_variance   | -0.00092      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00235       |
|    n_updates            | 5080          |
|    policy_gradient_loss | -0.000553     |
|    reward               | 0.013617156   |
|    value_loss           | 0.0276        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.77         |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 510          |
|    time_elapsed         | 3128         |
|    total_timesteps      | 522240       |
| train/                  |              |
|    approx_kl            | 0.0007756886 |
|    clip_fraction        | 0.0124       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.254       |
|    explained_variance   | -0.000335    |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00128      |
|    n_updates            | 5090         |
|    policy_gradient_loss | -0.000119    |
|    reward               | 0.008052255  |
|    value_loss           | 0.0266       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.764        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 511          |
|    time_elapsed         | 3134         |
|    total_timesteps      | 523264       |
| train/                  |              |
|    approx_kl            | 0.0012380612 |
|    clip_fraction        | 0.00293      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.249       |
|    explained_variance   | 0.000291     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.000723    |
|    n_updates            | 5100         |
|    policy_gradient_loss | 3.95e-05     |
|    reward               | 0.0072855013 |
|    value_loss           | 0.0232       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.762        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 513          |
|    time_elapsed         | 3146         |
|    total_timesteps      | 525312       |
| train/                  |              |
|    approx_kl            | 0.0015387437 |
|    clip_fraction        | 0.0246       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.226       |
|    explained_variance   | -0.00011     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00203     |
|    n_updates            | 5120         |
|    policy_gradient_loss | -0.000975    |
|    reward               | 0.0          |
|    value_loss           | 0.0248       |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 0.763       |
| time/                   |             |
|    fps                  | 166         |
|    iterations           | 514         |
|    time_elapsed         | 3152        |
|    total_timesteps      | 526336      |
| train/                  |             |
|    approx_kl            | 0.000122072 |
|    clip_fraction        | 0.00664     |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.2        |
|    explained_variance   | 0.00123     |
|    learning_rate        | 0.0002      |
|    loss                 | 0.00221     |
|    n_updates            | 5130        |
|    policy_gradient_loss | -0.00117    |
|    reward               | 0.01095108  |
|    value_loss           | 0.0261      |
-----------------------------------------


--------------------------------------------
| rollout/                |                |
|    ep_len_mean          | 1e+03          |
|    ep_rew_mean          | 0.762          |
| time/                   |                |
|    fps                  | 166            |
|    iterations           | 515            |
|    time_elapsed         | 3157           |
|    total_timesteps      | 527360         |
| train/                  |                |
|    approx_kl            | 0.000101359794 |
|    clip_fraction        | 0.00527        |
|    clip_range           | 0.2            |
|    entropy_loss         | -0.218         |
|    explained_variance   | 0.00067        |
|    learning_rate        | 0.0002         |
|    loss                 | 0.00867        |
|    n_updates            | 5140           |
|    policy_gradient_loss | 0.00107        |
|    reward               | 0.0            |
|    value_loss           | 0.0291         |
--------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.756        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 516          |
|    time_elapsed         | 3163         |
|    total_timesteps      | 528384       |
| train/                  |              |
|    approx_kl            | 0.0011121072 |
|    clip_fraction        | 0.0138       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.189       |
|    explained_variance   | 0.0011       |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00893     |
|    n_updates            | 5150         |
|    policy_gradient_loss | -0.00256     |
|    reward               | -0.027861493 |
|    value_loss           | 0.0179       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.757        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 517          |
|    time_elapsed         | 3169         |
|    total_timesteps      | 529408       |
| train/                  |              |
|    approx_kl            | 0.0019219569 |
|    clip_fraction        | 0.0187       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.212       |
|    explained_variance   | 0.000507     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0088       |
|    n_updates            | 5160         |
|    policy_gradient_loss | -0.00124     |
|    reward               | 0.048916247  |
|    value_loss           | 0.0311       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.753         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 518           |
|    time_elapsed         | 3175          |
|    total_timesteps      | 530432        |
| train/                  |               |
|    approx_kl            | 0.00051139126 |
|    clip_fraction        | 0.00684       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.24         |
|    explained_variance   | -6.84e-05     |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0181        |
|    n_updates            | 5170          |
|    policy_gradient_loss | 0.000629      |
|    reward               | 0.0           |
|    value_loss           | 0.0213        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.759         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 519           |
|    time_elapsed         | 3182          |
|    total_timesteps      | 531456        |
| train/                  |               |
|    approx_kl            | 0.00037175504 |
|    clip_fraction        | 0.00693       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.251        |
|    explained_variance   | 0.000594      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0041        |
|    n_updates            | 5180          |
|    policy_gradient_loss | 0.000586      |
|    reward               | 0.0           |
|    value_loss           | 0.0201        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.761        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 520          |
|    time_elapsed         | 3187         |
|    total_timesteps      | 532480       |
| train/                  |              |
|    approx_kl            | 0.0007662907 |
|    clip_fraction        | 0.00674      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.242       |
|    explained_variance   | -0.00086     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.0108      |
|    n_updates            | 5190         |
|    policy_gradient_loss | 0.000524     |
|    reward               | 0.0104822945 |
|    value_loss           | 0.0236       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.755         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 521           |
|    time_elapsed         | 3192          |
|    total_timesteps      | 533504        |
| train/                  |               |
|    approx_kl            | 0.00073535385 |
|    clip_fraction        | 0.00596       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.249        |
|    explained_variance   | -8.94e-05     |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00378       |
|    n_updates            | 5200          |
|    policy_gradient_loss | -0.000242     |
|    reward               | 0.0           |
|    value_loss           | 0.0243        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.743         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 522           |
|    time_elapsed         | 3198          |
|    total_timesteps      | 534528        |
| train/                  |               |
|    approx_kl            | 0.00040227256 |
|    clip_fraction        | 0.00615       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.255        |
|    explained_variance   | 0.000766      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00506       |
|    n_updates            | 5210          |
|    policy_gradient_loss | 0.000922      |
|    reward               | 0.0           |
|    value_loss           | 0.0281        |
-------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 0.755       |
| time/                   |             |
|    fps                  | 167         |
|    iterations           | 523         |
|    time_elapsed         | 3204        |
|    total_timesteps      | 535552      |
| train/                  |             |
|    approx_kl            | 0.001332941 |
|    clip_fraction        | 0.0198      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.255      |
|    explained_variance   | 0.00264     |
|    learning_rate        | 0.0002      |
|    loss                 | 0.00448     |
|    n_updates            | 5220        |
|    policy_gradient_loss | -0.00359    |
|    reward               | 0.012063252 |
|    value_loss           | 0.0287      |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 0.761       |
| time/                   |             |
|    fps                  | 167         |
|    iterations           | 524         |
|    time_elapsed         | 3210        |
|    total_timesteps      | 536576      |
| train/                  |             |
|    approx_kl            | 0.001706216 |
|    clip_fraction        | 0.0258      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.259      |
|    explained_variance   | 0.000335    |
|    learning_rate        | 0.0002      |
|    loss                 | 0.00163     |
|    n_updates            | 5230        |
|    policy_gradient_loss | -0.00243    |
|    reward               | 0.008246372 |
|    value_loss           | 0.0259      |
-----------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.759         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 525           |
|    time_elapsed         | 3216          |
|    total_timesteps      | 537600        |
| train/                  |               |
|    approx_kl            | 0.00035607617 |
|    clip_fraction        | 0.0137        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.283        |
|    explained_variance   | 0.000455      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0106        |
|    n_updates            | 5240          |
|    policy_gradient_loss | 0.000475      |
|    reward               | 0.0           |
|    value_loss           | 0.0252        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.751        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 526          |
|    time_elapsed         | 3221         |
|    total_timesteps      | 538624       |
| train/                  |              |
|    approx_kl            | 0.0009107765 |
|    clip_fraction        | 0.0166       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.28        |
|    explained_variance   | 0.000673     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.000754    |
|    n_updates            | 5250         |
|    policy_gradient_loss | -0.000766    |
|    reward               | -0.2795992   |
|    value_loss           | 0.0175       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.738         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 529           |
|    time_elapsed         | 3239          |
|    total_timesteps      | 541696        |
| train/                  |               |
|    approx_kl            | 0.00068678637 |
|    clip_fraction        | 0.0041        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.304        |
|    explained_variance   | 0.0022        |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00104       |
|    n_updates            | 5280          |
|    policy_gradient_loss | 0.000728      |
|    reward               | -0.00020002   |
|    value_loss           | 0.0236        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.727         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 531           |
|    time_elapsed         | 3251          |
|    total_timesteps      | 543744        |
| train/                  |               |
|    approx_kl            | 0.00014128635 |
|    clip_fraction        | 0.00879       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.324        |
|    explained_variance   | 3.18e-05      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.000548      |
|    n_updates            | 5300          |
|    policy_gradient_loss | 0.00106       |
|    reward               | 0.0           |
|    value_loss           | 0.0204        |
-------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 0.734       |
| time/                   |             |
|    fps                  | 167         |
|    iterations           | 532         |
|    time_elapsed         | 3256        |
|    total_timesteps      | 544768      |
| train/                  |             |
|    approx_kl            | 0.001398582 |
|    clip_fraction        | 0.0153      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.318      |
|    explained_variance   | 0.000214    |
|    learning_rate        | 0.0002      |
|    loss                 | -0.00193    |
|    n_updates            | 5310        |
|    policy_gradient_loss | -0.000132   |
|    reward               | 0.0         |
|    value_loss           | 0.0236      |
-----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1e+03      |
|    ep_rew_mean          | 0.732      |
| time/                   |            |
|    fps                  | 167        |
|    iterations           | 533        |
|    time_elapsed         | 3262       |
|    total_timesteps      | 545792     |
| train/                  |            |
|    approx_kl            | 0.00205565 |
|    clip_fraction        | 0.0167     |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.295     |
|    explained_variance   | 0.000807   |
|    learning_rate        | 0.0002     |
|    loss                 | 0.00317    |
|    n_updates            | 5320       |
|    policy_gradient_loss | -0.00281   |
|    reward               | 0.0        |
|    value_loss           | 0.0233     |
----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.737        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 534          |
|    time_elapsed         | 3268         |
|    total_timesteps      | 546816       |
| train/                  |              |
|    approx_kl            | 0.0014435428 |
|    clip_fraction        | 0.0192       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.256       |
|    explained_variance   | 0.0021       |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00427     |
|    n_updates            | 5330         |
|    policy_gradient_loss | -0.00336     |
|    reward               | -0.00020002  |
|    value_loss           | 0.0221       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.732        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 535          |
|    time_elapsed         | 3274         |
|    total_timesteps      | 547840       |
| train/                  |              |
|    approx_kl            | 0.0007404996 |
|    clip_fraction        | 0.0139       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.208       |
|    explained_variance   | 0.00044      |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00252     |
|    n_updates            | 5340         |
|    policy_gradient_loss | -0.00255     |
|    reward               | 0.0152157    |
|    value_loss           | 0.0226       |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 0.728       |
| time/                   |             |
|    fps                  | 167         |
|    iterations           | 536         |
|    time_elapsed         | 3279        |
|    total_timesteps      | 548864      |
| train/                  |             |
|    approx_kl            | 0.001001989 |
|    clip_fraction        | 0.0215      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.246      |
|    explained_variance   | 0.0028      |
|    learning_rate        | 0.0002      |
|    loss                 | 0.00682     |
|    n_updates            | 5350        |
|    policy_gradient_loss | -0.000315   |
|    reward               | 0.016630037 |
|    value_loss           | 0.0302      |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.723        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 537          |
|    time_elapsed         | 3285         |
|    total_timesteps      | 549888       |
| train/                  |              |
|    approx_kl            | 0.0012918382 |
|    clip_fraction        | 0.0192       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.241       |
|    explained_variance   | 0.00125      |
|    learning_rate        | 0.0002       |
|    loss                 | -0.000441    |
|    n_updates            | 5360         |
|    policy_gradient_loss | -0.00297     |
|    reward               | 0.020806918  |
|    value_loss           | 0.0195       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.725        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 538          |
|    time_elapsed         | 3291         |
|    total_timesteps      | 550912       |
| train/                  |              |
|    approx_kl            | 0.0008552238 |
|    clip_fraction        | 0.0146       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.206       |
|    explained_variance   | 0.000112     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00517      |
|    n_updates            | 5370         |
|    policy_gradient_loss | -0.00272     |
|    reward               | 0.0          |
|    value_loss           | 0.0216       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.728        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 539          |
|    time_elapsed         | 3297         |
|    total_timesteps      | 551936       |
| train/                  |              |
|    approx_kl            | 0.0005524021 |
|    clip_fraction        | 0.0112       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.185       |
|    explained_variance   | 0.000709     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.000815    |
|    n_updates            | 5380         |
|    policy_gradient_loss | -0.00229     |
|    reward               | 0.010244188  |
|    value_loss           | 0.0182       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.729        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 540          |
|    time_elapsed         | 3302         |
|    total_timesteps      | 552960       |
| train/                  |              |
|    approx_kl            | 0.0009875966 |
|    clip_fraction        | 0.0159       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.208       |
|    explained_variance   | 0.000578     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00425     |
|    n_updates            | 5390         |
|    policy_gradient_loss | 0.000157     |
|    reward               | -0.030240428 |
|    value_loss           | 0.0268       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.726        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 541          |
|    time_elapsed         | 3308         |
|    total_timesteps      | 553984       |
| train/                  |              |
|    approx_kl            | 0.0013984066 |
|    clip_fraction        | 0.0231       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.234       |
|    explained_variance   | 0.000516     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0158       |
|    n_updates            | 5400         |
|    policy_gradient_loss | -0.00117     |
|    reward               | 0.041691024  |
|    value_loss           | 0.0403       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.727         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 542           |
|    time_elapsed         | 3314          |
|    total_timesteps      | 555008        |
| train/                  |               |
|    approx_kl            | 0.00082290906 |
|    clip_fraction        | 0.0139        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.259        |
|    explained_variance   | 0.000606      |
|    learning_rate        | 0.0002        |
|    loss                 | -0.000515     |
|    n_updates            | 5410          |
|    policy_gradient_loss | 0.000679      |
|    reward               | 0.023675838   |
|    value_loss           | 0.028         |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.726        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 543          |
|    time_elapsed         | 3320         |
|    total_timesteps      | 556032       |
| train/                  |              |
|    approx_kl            | 0.0011653176 |
|    clip_fraction        | 0.00518      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.265       |
|    explained_variance   | 0.00185      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.000583     |
|    n_updates            | 5420         |
|    policy_gradient_loss | -0.0003      |
|    reward               | 0.019028705  |
|    value_loss           | 0.0188       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.721        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 544          |
|    time_elapsed         | 3326         |
|    total_timesteps      | 557056       |
| train/                  |              |
|    approx_kl            | 0.0007980472 |
|    clip_fraction        | 0.0107       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.245       |
|    explained_variance   | 0.00258      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00854      |
|    n_updates            | 5430         |
|    policy_gradient_loss | -0.00202     |
|    reward               | 0.0          |
|    value_loss           | 0.0183       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.717        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 545          |
|    time_elapsed         | 3332         |
|    total_timesteps      | 558080       |
| train/                  |              |
|    approx_kl            | 0.0015603846 |
|    clip_fraction        | 0.0117       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.27        |
|    explained_variance   | 0.000473     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0251       |
|    n_updates            | 5440         |
|    policy_gradient_loss | 0.000598     |
|    reward               | 0.0          |
|    value_loss           | 0.0295       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.716         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 546           |
|    time_elapsed         | 3338          |
|    total_timesteps      | 559104        |
| train/                  |               |
|    approx_kl            | 0.00071828766 |
|    clip_fraction        | 0.0151        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.279        |
|    explained_variance   | -0.00109      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00258       |
|    n_updates            | 5450          |
|    policy_gradient_loss | -0.00127      |
|    reward               | 0.07296216    |
|    value_loss           | 0.0205        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.709         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 548           |
|    time_elapsed         | 3350          |
|    total_timesteps      | 561152        |
| train/                  |               |
|    approx_kl            | 0.00058506604 |
|    clip_fraction        | 0.00986       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.25         |
|    explained_variance   | 0.000766      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.026         |
|    n_updates            | 5470          |
|    policy_gradient_loss | 0.000876      |
|    reward               | 0.029488755   |
|    value_loss           | 0.0302        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.705        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 549          |
|    time_elapsed         | 3356         |
|    total_timesteps      | 562176       |
| train/                  |              |
|    approx_kl            | 0.0013301717 |
|    clip_fraction        | 0.0156       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.224       |
|    explained_variance   | 0.00126      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00829      |
|    n_updates            | 5480         |
|    policy_gradient_loss | -0.00314     |
|    reward               | 0.0          |
|    value_loss           | 0.0193       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.709        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 550          |
|    time_elapsed         | 3362         |
|    total_timesteps      | 563200       |
| train/                  |              |
|    approx_kl            | 0.0005610605 |
|    clip_fraction        | 0.0042       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.203       |
|    explained_variance   | -0.00115     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00328      |
|    n_updates            | 5490         |
|    policy_gradient_loss | -0.000835    |
|    reward               | 0.0          |
|    value_loss           | 0.0242       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.712         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 551           |
|    time_elapsed         | 3368          |
|    total_timesteps      | 564224        |
| train/                  |               |
|    approx_kl            | 0.00020393147 |
|    clip_fraction        | 0.0041        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.19         |
|    explained_variance   | 0.000137      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00595       |
|    n_updates            | 5500          |
|    policy_gradient_loss | 0.000661      |
|    reward               | -0.013452455  |
|    value_loss           | 0.0311        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.715        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 552          |
|    time_elapsed         | 3374         |
|    total_timesteps      | 565248       |
| train/                  |              |
|    approx_kl            | 0.0007604808 |
|    clip_fraction        | 0.0042       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.168       |
|    explained_variance   | 0.000926     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0063       |
|    n_updates            | 5510         |
|    policy_gradient_loss | -0.000821    |
|    reward               | -0.1351345   |
|    value_loss           | 0.0299       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.712         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 553           |
|    time_elapsed         | 3379          |
|    total_timesteps      | 566272        |
| train/                  |               |
|    approx_kl            | 0.00013066403 |
|    clip_fraction        | 0.00352       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.166        |
|    explained_variance   | 0.000651      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.004         |
|    n_updates            | 5520          |
|    policy_gradient_loss | 0.000256      |
|    reward               | 0.0075396285  |
|    value_loss           | 0.0296        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.707         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 554           |
|    time_elapsed         | 3385          |
|    total_timesteps      | 567296        |
| train/                  |               |
|    approx_kl            | 0.00046915957 |
|    clip_fraction        | 0.00313       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.186        |
|    explained_variance   | 0.000734      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0251        |
|    n_updates            | 5530          |
|    policy_gradient_loss | 0.00102       |
|    reward               | 0.06845144    |
|    value_loss           | 0.0262        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.705        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 555          |
|    time_elapsed         | 3391         |
|    total_timesteps      | 568320       |
| train/                  |              |
|    approx_kl            | 0.0007511665 |
|    clip_fraction        | 0.00293      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.179       |
|    explained_variance   | 0.000815     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00544      |
|    n_updates            | 5540         |
|    policy_gradient_loss | -0.00048     |
|    reward               | 0.049166735  |
|    value_loss           | 0.0213       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.713         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 556           |
|    time_elapsed         | 3397          |
|    total_timesteps      | 569344        |
| train/                  |               |
|    approx_kl            | 0.00029679295 |
|    clip_fraction        | 0.000879      |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.159        |
|    explained_variance   | 0.000777      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00748       |
|    n_updates            | 5550          |
|    policy_gradient_loss | -0.00039      |
|    reward               | -0.0039198636 |
|    value_loss           | 0.0245        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.719         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 557           |
|    time_elapsed         | 3402          |
|    total_timesteps      | 570368        |
| train/                  |               |
|    approx_kl            | 0.00065665616 |
|    clip_fraction        | 0.0208        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.175        |
|    explained_variance   | 0.000122      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0101        |
|    n_updates            | 5560          |
|    policy_gradient_loss | -0.00294      |
|    reward               | -0.00880356   |
|    value_loss           | 0.034         |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.706        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 558          |
|    time_elapsed         | 3408         |
|    total_timesteps      | 571392       |
| train/                  |              |
|    approx_kl            | 0.0026928533 |
|    clip_fraction        | 0.0117       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.204       |
|    explained_variance   | -2.3e-05     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00815      |
|    n_updates            | 5570         |
|    policy_gradient_loss | -0.000258    |
|    reward               | -0.04712342  |
|    value_loss           | 0.0302       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.707        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 560          |
|    time_elapsed         | 3420         |
|    total_timesteps      | 573440       |
| train/                  |              |
|    approx_kl            | 0.0004082237 |
|    clip_fraction        | 0.00566      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.228       |
|    explained_variance   | 9.08e-05     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0122       |
|    n_updates            | 5590         |
|    policy_gradient_loss | -0.000354    |
|    reward               | 0.0          |
|    value_loss           | 0.0267       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.708        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 561          |
|    time_elapsed         | 3426         |
|    total_timesteps      | 574464       |
| train/                  |              |
|    approx_kl            | 0.0014109213 |
|    clip_fraction        | 0.0212       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.213       |
|    explained_variance   | 0.000918     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.0123      |
|    n_updates            | 5600         |
|    policy_gradient_loss | -0.00367     |
|    reward               | 0.003682656  |
|    value_loss           | 0.0183       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.704        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 562          |
|    time_elapsed         | 3432         |
|    total_timesteps      | 575488       |
| train/                  |              |
|    approx_kl            | 0.0005599338 |
|    clip_fraction        | 0.0165       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.202       |
|    explained_variance   | 0.000812     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0134       |
|    n_updates            | 5610         |
|    policy_gradient_loss | -0.00343     |
|    reward               | 0.0044796527 |
|    value_loss           | 0.0287       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.702        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 563          |
|    time_elapsed         | 3438         |
|    total_timesteps      | 576512       |
| train/                  |              |
|    approx_kl            | 0.0005932531 |
|    clip_fraction        | 0.0187       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.236       |
|    explained_variance   | -0.00034     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0149       |
|    n_updates            | 5620         |
|    policy_gradient_loss | -0.000527    |
|    reward               | 0.0024489777 |
|    value_loss           | 0.03         |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.706        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 564          |
|    time_elapsed         | 3444         |
|    total_timesteps      | 577536       |
| train/                  |              |
|    approx_kl            | 0.0007047522 |
|    clip_fraction        | 0.0129       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.212       |
|    explained_variance   | 0.000353     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.000355     |
|    n_updates            | 5630         |
|    policy_gradient_loss | -0.00188     |
|    reward               | 0.008535589  |
|    value_loss           | 0.0234       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.71          |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 565           |
|    time_elapsed         | 3450          |
|    total_timesteps      | 578560        |
| train/                  |               |
|    approx_kl            | 0.00036894227 |
|    clip_fraction        | 0.00664       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.235        |
|    explained_variance   | 0.000447      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0013        |
|    n_updates            | 5640          |
|    policy_gradient_loss | 0.0008        |
|    reward               | 0.18784492    |
|    value_loss           | 0.0279        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.716        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 566          |
|    time_elapsed         | 3456         |
|    total_timesteps      | 579584       |
| train/                  |              |
|    approx_kl            | 0.0005075164 |
|    clip_fraction        | 0.011        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.261       |
|    explained_variance   | 0.00067      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0222       |
|    n_updates            | 5650         |
|    policy_gradient_loss | 0.000533     |
|    reward               | 0.0          |
|    value_loss           | 0.0312       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.713        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 567          |
|    time_elapsed         | 3462         |
|    total_timesteps      | 580608       |
| train/                  |              |
|    approx_kl            | 0.0013718535 |
|    clip_fraction        | 0.0247       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.258       |
|    explained_variance   | -0.00165     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.018        |
|    n_updates            | 5660         |
|    policy_gradient_loss | -0.0028      |
|    reward               | 0.006557557  |
|    value_loss           | 0.0242       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.714        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 568          |
|    time_elapsed         | 3468         |
|    total_timesteps      | 581632       |
| train/                  |              |
|    approx_kl            | 0.0012104749 |
|    clip_fraction        | 0.0175       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.217       |
|    explained_variance   | 6.1e-05      |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00692     |
|    n_updates            | 5670         |
|    policy_gradient_loss | -0.00357     |
|    reward               | -0.027536048 |
|    value_loss           | 0.0194       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.712        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 569          |
|    time_elapsed         | 3473         |
|    total_timesteps      | 582656       |
| train/                  |              |
|    approx_kl            | 0.000654828  |
|    clip_fraction        | 0.00889      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.212       |
|    explained_variance   | 0.00114      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00114      |
|    n_updates            | 5680         |
|    policy_gradient_loss | -0.00043     |
|    reward               | 0.0013281382 |
|    value_loss           | 0.025        |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.72         |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 570          |
|    time_elapsed         | 3479         |
|    total_timesteps      | 583680       |
| train/                  |              |
|    approx_kl            | 0.000515147  |
|    clip_fraction        | 0.00605      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.222       |
|    explained_variance   | 0.000951     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00129      |
|    n_updates            | 5690         |
|    policy_gradient_loss | -0.00027     |
|    reward               | 0.0074905707 |
|    value_loss           | 0.0226       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.722        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 571          |
|    time_elapsed         | 3484         |
|    total_timesteps      | 584704       |
| train/                  |              |
|    approx_kl            | 0.0007026284 |
|    clip_fraction        | 0.0254       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.218       |
|    explained_variance   | 0.000422     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00601      |
|    n_updates            | 5700         |
|    policy_gradient_loss | -0.00184     |
|    reward               | 0.0          |
|    value_loss           | 0.0236       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.719        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 572          |
|    time_elapsed         | 3490         |
|    total_timesteps      | 585728       |
| train/                  |              |
|    approx_kl            | 0.0010396603 |
|    clip_fraction        | 0.00986      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.22        |
|    explained_variance   | 0.00135      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0111       |
|    n_updates            | 5710         |
|    policy_gradient_loss | -0.000671    |
|    reward               | -0.024737727 |
|    value_loss           | 0.0289       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.718        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 573          |
|    time_elapsed         | 3496         |
|    total_timesteps      | 586752       |
| train/                  |              |
|    approx_kl            | 0.0006122928 |
|    clip_fraction        | 0.0041       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.221       |
|    explained_variance   | 0.000954     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.004       |
|    n_updates            | 5720         |
|    policy_gradient_loss | 0.000273     |
|    reward               | 0.0          |
|    value_loss           | 0.0214       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.715        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 574          |
|    time_elapsed         | 3502         |
|    total_timesteps      | 587776       |
| train/                  |              |
|    approx_kl            | 0.0007183158 |
|    clip_fraction        | 0.0185       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.229       |
|    explained_variance   | 0.000928     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0027       |
|    n_updates            | 5730         |
|    policy_gradient_loss | -0.00211     |
|    reward               | 0.027018055  |
|    value_loss           | 0.0265       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.721         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 575           |
|    time_elapsed         | 3507          |
|    total_timesteps      | 588800        |
| train/                  |               |
|    approx_kl            | 0.00082863076 |
|    clip_fraction        | 0.0105        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.203        |
|    explained_variance   | 0.00106       |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00805       |
|    n_updates            | 5740          |
|    policy_gradient_loss | -0.00122      |
|    reward               | -0.008259039  |
|    value_loss           | 0.0203        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.725         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 576           |
|    time_elapsed         | 3513          |
|    total_timesteps      | 589824        |
| train/                  |               |
|    approx_kl            | 0.00081294007 |
|    clip_fraction        | 0.0115        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.18         |
|    explained_variance   | 0.000227      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00306       |
|    n_updates            | 5750          |
|    policy_gradient_loss | -0.00193      |
|    reward               | 0.023156084   |
|    value_loss           | 0.0203        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.73         |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 577          |
|    time_elapsed         | 3519         |
|    total_timesteps      | 590848       |
| train/                  |              |
|    approx_kl            | 6.925734e-05 |
|    clip_fraction        | 0.00313      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.18        |
|    explained_variance   | 0.000392     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0117       |
|    n_updates            | 5760         |
|    policy_gradient_loss | 0.000455     |
|    reward               | 0.0          |
|    value_loss           | 0.0291       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.731        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 578          |
|    time_elapsed         | 3526         |
|    total_timesteps      | 591872       |
| train/                  |              |
|    approx_kl            | 0.0008813195 |
|    clip_fraction        | 0.0163       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.158       |
|    explained_variance   | 0.000675     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00197      |
|    n_updates            | 5770         |
|    policy_gradient_loss | -0.00406     |
|    reward               | 0.0          |
|    value_loss           | 0.0277       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.736         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 580           |
|    time_elapsed         | 3537          |
|    total_timesteps      | 593920        |
| train/                  |               |
|    approx_kl            | 0.00041001977 |
|    clip_fraction        | 0.00781       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.179        |
|    explained_variance   | 0.000959      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00272       |
|    n_updates            | 5790          |
|    policy_gradient_loss | 0.000196      |
|    reward               | 0.0           |
|    value_loss           | 0.0188        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.735        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 581          |
|    time_elapsed         | 3543         |
|    total_timesteps      | 594944       |
| train/                  |              |
|    approx_kl            | 0.0007236537 |
|    clip_fraction        | 0.0104       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.193       |
|    explained_variance   | 0.000315     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.0043      |
|    n_updates            | 5800         |
|    policy_gradient_loss | 0.000275     |
|    reward               | 0.0          |
|    value_loss           | 0.037        |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.742         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 582           |
|    time_elapsed         | 3549          |
|    total_timesteps      | 595968        |
| train/                  |               |
|    approx_kl            | 0.00078757893 |
|    clip_fraction        | 0.0126        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.181        |
|    explained_variance   | 0.000588      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00915       |
|    n_updates            | 5810          |
|    policy_gradient_loss | -0.00236      |
|    reward               | 0.01627596    |
|    value_loss           | 0.0209        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.745        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 583          |
|    time_elapsed         | 3555         |
|    total_timesteps      | 596992       |
| train/                  |              |
|    approx_kl            | 0.0003867608 |
|    clip_fraction        | 0.00889      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.186       |
|    explained_variance   | 0.000571     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00477      |
|    n_updates            | 5820         |
|    policy_gradient_loss | -0.000235    |
|    reward               | 0.044030596  |
|    value_loss           | 0.0243       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.746         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 584           |
|    time_elapsed         | 3561          |
|    total_timesteps      | 598016        |
| train/                  |               |
|    approx_kl            | 0.00063699926 |
|    clip_fraction        | 0.0167        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.189        |
|    explained_variance   | 0.000588      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00542       |
|    n_updates            | 5830          |
|    policy_gradient_loss | -0.002        |
|    reward               | 0.025829427   |
|    value_loss           | 0.0352        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.744        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 585          |
|    time_elapsed         | 3567         |
|    total_timesteps      | 599040       |
| train/                  |              |
|    approx_kl            | 0.0010913811 |
|    clip_fraction        | 0.0264       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.209       |
|    explained_variance   | 0.00107      |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00549     |
|    n_updates            | 5840         |
|    policy_gradient_loss | -0.00265     |
|    reward               | -0.15674342  |
|    value_loss           | 0.0257       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.739         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 586           |
|    time_elapsed         | 3572          |
|    total_timesteps      | 600064        |
| train/                  |               |
|    approx_kl            | 0.00086270383 |
|    clip_fraction        | 0.0187        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.185        |
|    explained_variance   | 0.000577      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.000491      |
|    n_updates            | 5850          |
|    policy_gradient_loss | -0.00292      |
|    reward               | 0.0           |
|    value_loss           | 0.0212        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.741        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 587          |
|    time_elapsed         | 3579         |
|    total_timesteps      | 601088       |
| train/                  |              |
|    approx_kl            | 0.0004729489 |
|    clip_fraction        | 0.00674      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.201       |
|    explained_variance   | 0.0001       |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00147      |
|    n_updates            | 5860         |
|    policy_gradient_loss | 0.000281     |
|    reward               | 0.017204732  |
|    value_loss           | 0.026        |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.736        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 588          |
|    time_elapsed         | 3585         |
|    total_timesteps      | 602112       |
| train/                  |              |
|    approx_kl            | 0.0003098254 |
|    clip_fraction        | 0.00459      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.213       |
|    explained_variance   | 0.000563     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00592      |
|    n_updates            | 5870         |
|    policy_gradient_loss | 0.000679     |
|    reward               | 0.046796735  |
|    value_loss           | 0.0254       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.741         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 589           |
|    time_elapsed         | 3591          |
|    total_timesteps      | 603136        |
| train/                  |               |
|    approx_kl            | 0.00042044843 |
|    clip_fraction        | 0.0103        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.194        |
|    explained_variance   | 0.000759      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00208       |
|    n_updates            | 5880          |
|    policy_gradient_loss | -0.00212      |
|    reward               | 0.007002467   |
|    value_loss           | 0.0237        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.745        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 590          |
|    time_elapsed         | 3597         |
|    total_timesteps      | 604160       |
| train/                  |              |
|    approx_kl            | 7.033581e-05 |
|    clip_fraction        | 0.00479      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.197       |
|    explained_variance   | 0.000483     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00738      |
|    n_updates            | 5890         |
|    policy_gradient_loss | 0.000608     |
|    reward               | 0.0          |
|    value_loss           | 0.0251       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.739        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 591          |
|    time_elapsed         | 3603         |
|    total_timesteps      | 605184       |
| train/                  |              |
|    approx_kl            | 0.0006285041 |
|    clip_fraction        | 0.0183       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.21        |
|    explained_variance   | 0.000397     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0101       |
|    n_updates            | 5900         |
|    policy_gradient_loss | -0.000368    |
|    reward               | 0.009657974  |
|    value_loss           | 0.0337       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.74         |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 592          |
|    time_elapsed         | 3609         |
|    total_timesteps      | 606208       |
| train/                  |              |
|    approx_kl            | 0.0004712612 |
|    clip_fraction        | 0.0139       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.229       |
|    explained_variance   | 0.000645     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00559      |
|    n_updates            | 5910         |
|    policy_gradient_loss | 0.000218     |
|    reward               | 0.00602061   |
|    value_loss           | 0.0313       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.748        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 593          |
|    time_elapsed         | 3615         |
|    total_timesteps      | 607232       |
| train/                  |              |
|    approx_kl            | 0.0010771657 |
|    clip_fraction        | 0.0108       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.219       |
|    explained_variance   | 0.000376     |
|    learning_rate        | 0.0002       |
|    loss                 | 5.26e-05     |
|    n_updates            | 5920         |
|    policy_gradient_loss | -0.00105     |
|    reward               | 0.006955964  |
|    value_loss           | 0.0247       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.744         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 594           |
|    time_elapsed         | 3621          |
|    total_timesteps      | 608256        |
| train/                  |               |
|    approx_kl            | 0.00030293572 |
|    clip_fraction        | 0.00586       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.201        |
|    explained_variance   | 0.00129       |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00734       |
|    n_updates            | 5930          |
|    policy_gradient_loss | -0.00112      |
|    reward               | 0.005930007   |
|    value_loss           | 0.0287        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.748        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 595          |
|    time_elapsed         | 3627         |
|    total_timesteps      | 609280       |
| train/                  |              |
|    approx_kl            | 0.0032173032 |
|    clip_fraction        | 0.00713      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.222       |
|    explained_variance   | 0.000924     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00908      |
|    n_updates            | 5940         |
|    policy_gradient_loss | -0.000429    |
|    reward               | -0.010992582 |
|    value_loss           | 0.027        |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.75          |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 596           |
|    time_elapsed         | 3633          |
|    total_timesteps      | 610304        |
| train/                  |               |
|    approx_kl            | 0.00093655783 |
|    clip_fraction        | 0.0148        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.225        |
|    explained_variance   | 0.00139       |
|    learning_rate        | 0.0002        |
|    loss                 | 0.000269      |
|    n_updates            | 5950          |
|    policy_gradient_loss | -0.00284      |
|    reward               | 0.06262857    |
|    value_loss           | 0.0193        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.743        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 597          |
|    time_elapsed         | 3639         |
|    total_timesteps      | 611328       |
| train/                  |              |
|    approx_kl            | 0.0071573043 |
|    clip_fraction        | 0.0187       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.295       |
|    explained_variance   | 0.000485     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00373     |
|    n_updates            | 5960         |
|    policy_gradient_loss | -0.000364    |
|    reward               | 0.04171928   |
|    value_loss           | 0.028        |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.742        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 599          |
|    time_elapsed         | 3651         |
|    total_timesteps      | 613376       |
| train/                  |              |
|    approx_kl            | 0.0009777017 |
|    clip_fraction        | 0.0237       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.319       |
|    explained_variance   | 0.00167      |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00367     |
|    n_updates            | 5980         |
|    policy_gradient_loss | -0.00102     |
|    reward               | 0.01913764   |
|    value_loss           | 0.019        |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.742        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 600          |
|    time_elapsed         | 3657         |
|    total_timesteps      | 614400       |
| train/                  |              |
|    approx_kl            | 0.0014903969 |
|    clip_fraction        | 0.0162       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.282       |
|    explained_variance   | 0.000571     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00442     |
|    n_updates            | 5990         |
|    policy_gradient_loss | -0.002       |
|    reward               | -0.010549838 |
|    value_loss           | 0.0187       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.737        |
| time/                   |              |
|    fps                  | 168          |
|    iterations           | 601          |
|    time_elapsed         | 3662         |
|    total_timesteps      | 615424       |
| train/                  |              |
|    approx_kl            | 0.0016102121 |
|    clip_fraction        | 0.0297       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.284       |
|    explained_variance   | 0.000825     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.0175      |
|    n_updates            | 6000         |
|    policy_gradient_loss | -0.00293     |
|    reward               | 0.0          |
|    value_loss           | 0.0158       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.741        |
| time/                   |              |
|    fps                  | 168          |
|    iterations           | 602          |
|    time_elapsed         | 3668         |
|    total_timesteps      | 616448       |
| train/                  |              |
|    approx_kl            | 0.0015264694 |
|    clip_fraction        | 0.0173       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.24        |
|    explained_variance   | 0.00113      |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00245     |
|    n_updates            | 6010         |
|    policy_gradient_loss | -0.00335     |
|    reward               | 0.0          |
|    value_loss           | 0.0211       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.747        |
| time/                   |              |
|    fps                  | 168          |
|    iterations           | 603          |
|    time_elapsed         | 3674         |
|    total_timesteps      | 617472       |
| train/                  |              |
|    approx_kl            | 0.0008655201 |
|    clip_fraction        | 0.00693      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.206       |
|    explained_variance   | 0.000735     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00154      |
|    n_updates            | 6020         |
|    policy_gradient_loss | -0.000953    |
|    reward               | 0.023974707  |
|    value_loss           | 0.0207       |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 0.747       |
| time/                   |             |
|    fps                  | 168         |
|    iterations           | 605         |
|    time_elapsed         | 3686        |
|    total_timesteps      | 619520      |
| train/                  |             |
|    approx_kl            | 0.016454503 |
|    clip_fraction        | 0.0125      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.271      |
|    explained_variance   | 0.000461    |
|    learning_rate        | 0.0002      |
|    loss                 | 0.000805    |
|    n_updates            | 6040        |
|    policy_gradient_loss | -0.000652   |
|    reward               | 0.0         |
|    value_loss           | 0.0277      |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.735        |
| time/                   |              |
|    fps                  | 168          |
|    iterations           | 606          |
|    time_elapsed         | 3692         |
|    total_timesteps      | 620544       |
| train/                  |              |
|    approx_kl            | 0.0012031879 |
|    clip_fraction        | 0.0148       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.369       |
|    explained_variance   | 0.000578     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00429     |
|    n_updates            | 6050         |
|    policy_gradient_loss | -4.97e-05    |
|    reward               | -0.04121569  |
|    value_loss           | 0.0192       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.733        |
| time/                   |              |
|    fps                  | 168          |
|    iterations           | 607          |
|    time_elapsed         | 3697         |
|    total_timesteps      | 621568       |
| train/                  |              |
|    approx_kl            | 0.0017256517 |
|    clip_fraction        | 0.0251       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.323       |
|    explained_variance   | 0.0014       |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00769     |
|    n_updates            | 6060         |
|    policy_gradient_loss | -0.00436     |
|    reward               | 0.036764782  |
|    value_loss           | 0.0191       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.746        |
| time/                   |              |
|    fps                  | 168          |
|    iterations           | 608          |
|    time_elapsed         | 3703         |
|    total_timesteps      | 622592       |
| train/                  |              |
|    approx_kl            | 0.0016081622 |
|    clip_fraction        | 0.0277       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.269       |
|    explained_variance   | 0.000414     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00269     |
|    n_updates            | 6070         |
|    policy_gradient_loss | -0.00474     |
|    reward               | 0.0          |
|    value_loss           | 0.0169       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.746        |
| time/                   |              |
|    fps                  | 168          |
|    iterations           | 609          |
|    time_elapsed         | 3709         |
|    total_timesteps      | 623616       |
| train/                  |              |
|    approx_kl            | 0.0016139755 |
|    clip_fraction        | 0.0156       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.243       |
|    explained_variance   | -0.000107    |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00492     |
|    n_updates            | 6080         |
|    policy_gradient_loss | -0.00282     |
|    reward               | -0.04101565  |
|    value_loss           | 0.0226       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.747        |
| time/                   |              |
|    fps                  | 168          |
|    iterations           | 610          |
|    time_elapsed         | 3714         |
|    total_timesteps      | 624640       |
| train/                  |              |
|    approx_kl            | 0.0004345943 |
|    clip_fraction        | 0.0105       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.274       |
|    explained_variance   | 0.000288     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00702      |
|    n_updates            | 6090         |
|    policy_gradient_loss | 0.00101      |
|    reward               | 0.01869423   |
|    value_loss           | 0.0262       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.751        |
| time/                   |              |
|    fps                  | 168          |
|    iterations           | 611          |
|    time_elapsed         | 3719         |
|    total_timesteps      | 625664       |
| train/                  |              |
|    approx_kl            | 0.0013421    |
|    clip_fraction        | 0.00674      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.265       |
|    explained_variance   | 0.00117      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.000656     |
|    n_updates            | 6100         |
|    policy_gradient_loss | -0.000816    |
|    reward               | -0.030849952 |
|    value_loss           | 0.0166       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.743         |
| time/                   |               |
|    fps                  | 168           |
|    iterations           | 612           |
|    time_elapsed         | 3725          |
|    total_timesteps      | 626688        |
| train/                  |               |
|    approx_kl            | 0.00090906303 |
|    clip_fraction        | 0.0113        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.222        |
|    explained_variance   | 0.00237       |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00673       |
|    n_updates            | 6110          |
|    policy_gradient_loss | -0.00156      |
|    reward               | 0.0           |
|    value_loss           | 0.0177        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.743         |
| time/                   |               |
|    fps                  | 168           |
|    iterations           | 613           |
|    time_elapsed         | 3731          |
|    total_timesteps      | 627712        |
| train/                  |               |
|    approx_kl            | 0.00034255337 |
|    clip_fraction        | 0.0043        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.238        |
|    explained_variance   | 0.000784      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00855       |
|    n_updates            | 6120          |
|    policy_gradient_loss | 0.000631      |
|    reward               | 0.01578481    |
|    value_loss           | 0.0227        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.749         |
| time/                   |               |
|    fps                  | 168           |
|    iterations           | 614           |
|    time_elapsed         | 3737          |
|    total_timesteps      | 628736        |
| train/                  |               |
|    approx_kl            | 0.00051908795 |
|    clip_fraction        | 0.00576       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.211        |
|    explained_variance   | 0.000665      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0131        |
|    n_updates            | 6130          |
|    policy_gradient_loss | -0.00172      |
|    reward               | 0.049321674   |
|    value_loss           | 0.0322        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.743        |
| time/                   |              |
|    fps                  | 168          |
|    iterations           | 615          |
|    time_elapsed         | 3743         |
|    total_timesteps      | 629760       |
| train/                  |              |
|    approx_kl            | 0.0008344225 |
|    clip_fraction        | 0.0136       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.18        |
|    explained_variance   | -0.000504    |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00369      |
|    n_updates            | 6140         |
|    policy_gradient_loss | -0.0022      |
|    reward               | 0.0          |
|    value_loss           | 0.0269       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.753         |
| time/                   |               |
|    fps                  | 168           |
|    iterations           | 616           |
|    time_elapsed         | 3749          |
|    total_timesteps      | 630784        |
| train/                  |               |
|    approx_kl            | 0.00045860163 |
|    clip_fraction        | 0.00645       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.196        |
|    explained_variance   | 0.000678      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00455       |
|    n_updates            | 6150          |
|    policy_gradient_loss | 0.000489      |
|    reward               | -0.0011075303 |
|    value_loss           | 0.0226        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.767        |
| time/                   |              |
|    fps                  | 168          |
|    iterations           | 618          |
|    time_elapsed         | 3761         |
|    total_timesteps      | 632832       |
| train/                  |              |
|    approx_kl            | 0.0010610741 |
|    clip_fraction        | 0.00967      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.22        |
|    explained_variance   | 0.000518     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00457      |
|    n_updates            | 6170         |
|    policy_gradient_loss | -0.0017      |
|    reward               | 0.0          |
|    value_loss           | 0.0254       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.774         |
| time/                   |               |
|    fps                  | 168           |
|    iterations           | 619           |
|    time_elapsed         | 3766          |
|    total_timesteps      | 633856        |
| train/                  |               |
|    approx_kl            | 0.00088539056 |
|    clip_fraction        | 0.0201        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.25         |
|    explained_variance   | 0.000313      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00467       |
|    n_updates            | 6180          |
|    policy_gradient_loss | -0.000283     |
|    reward               | 0.017424395   |
|    value_loss           | 0.0246        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.778        |
| time/                   |              |
|    fps                  | 168          |
|    iterations           | 620          |
|    time_elapsed         | 3772         |
|    total_timesteps      | 634880       |
| train/                  |              |
|    approx_kl            | 0.0004003481 |
|    clip_fraction        | 0.0105       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.258       |
|    explained_variance   | 0.000511     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00264     |
|    n_updates            | 6190         |
|    policy_gradient_loss | 5.37e-05     |
|    reward               | 0.0009722606 |
|    value_loss           | 0.0225       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.774        |
| time/                   |              |
|    fps                  | 168          |
|    iterations           | 621          |
|    time_elapsed         | 3777         |
|    total_timesteps      | 635904       |
| train/                  |              |
|    approx_kl            | 0.0017914588 |
|    clip_fraction        | 0.0211       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.22        |
|    explained_variance   | 0.00239      |
|    learning_rate        | 0.0002       |
|    loss                 | -0.0132      |
|    n_updates            | 6200         |
|    policy_gradient_loss | -0.00311     |
|    reward               | 0.013630248  |
|    value_loss           | 0.0183       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.77         |
| time/                   |              |
|    fps                  | 168          |
|    iterations           | 622          |
|    time_elapsed         | 3784         |
|    total_timesteps      | 636928       |
| train/                  |              |
|    approx_kl            | 0.0011085549 |
|    clip_fraction        | 0.00791      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.196       |
|    explained_variance   | 0.000327     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0188       |
|    n_updates            | 6210         |
|    policy_gradient_loss | -0.00129     |
|    reward               | 0.045604408  |
|    value_loss           | 0.0233       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.773         |
| time/                   |               |
|    fps                  | 168           |
|    iterations           | 624           |
|    time_elapsed         | 3796          |
|    total_timesteps      | 638976        |
| train/                  |               |
|    approx_kl            | 0.00063097314 |
|    clip_fraction        | 0.0264        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.231        |
|    explained_variance   | -0.000432     |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00328       |
|    n_updates            | 6230          |
|    policy_gradient_loss | -0.00152      |
|    reward               | 0.0           |
|    value_loss           | 0.0298        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.775        |
| time/                   |              |
|    fps                  | 168          |
|    iterations           | 625          |
|    time_elapsed         | 3802         |
|    total_timesteps      | 640000       |
| train/                  |              |
|    approx_kl            | 0.0010196115 |
|    clip_fraction        | 0.0165       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.257       |
|    explained_variance   | 0.000127     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0045       |
|    n_updates            | 6240         |
|    policy_gradient_loss | 0.00106      |
|    reward               | 0.0          |
|    value_loss           | 0.0315       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.771         |
| time/                   |               |
|    fps                  | 168           |
|    iterations           | 626           |
|    time_elapsed         | 3808          |
|    total_timesteps      | 641024        |
| train/                  |               |
|    approx_kl            | 0.00031598337 |
|    clip_fraction        | 0.0144        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.263        |
|    explained_variance   | 0.0007        |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0196        |
|    n_updates            | 6250          |
|    policy_gradient_loss | -0.000719     |
|    reward               | 0.03786948    |
|    value_loss           | 0.0383        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.776        |
| time/                   |              |
|    fps                  | 168          |
|    iterations           | 627          |
|    time_elapsed         | 3814         |
|    total_timesteps      | 642048       |
| train/                  |              |
|    approx_kl            | 0.0004486168 |
|    clip_fraction        | 0.0146       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.258       |
|    explained_variance   | 0.000818     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00228     |
|    n_updates            | 6260         |
|    policy_gradient_loss | -0.000816    |
|    reward               | 0.008591785  |
|    value_loss           | 0.0202       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.79         |
| time/                   |              |
|    fps                  | 168          |
|    iterations           | 628          |
|    time_elapsed         | 3820         |
|    total_timesteps      | 643072       |
| train/                  |              |
|    approx_kl            | 0.0013951329 |
|    clip_fraction        | 0.0165       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.218       |
|    explained_variance   | 0.000793     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.0179      |
|    n_updates            | 6270         |
|    policy_gradient_loss | -0.00231     |
|    reward               | 0.0          |
|    value_loss           | 0.0233       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.793        |
| time/                   |              |
|    fps                  | 168          |
|    iterations           | 629          |
|    time_elapsed         | 3826         |
|    total_timesteps      | 644096       |
| train/                  |              |
|    approx_kl            | 0.0006061331 |
|    clip_fraction        | 0.00664      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.221       |
|    explained_variance   | 0.000889     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00605      |
|    n_updates            | 6280         |
|    policy_gradient_loss | -0.000545    |
|    reward               | 0.0023676087 |
|    value_loss           | 0.027        |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.802        |
| time/                   |              |
|    fps                  | 168          |
|    iterations           | 630          |
|    time_elapsed         | 3831         |
|    total_timesteps      | 645120       |
| train/                  |              |
|    approx_kl            | 0.0010420533 |
|    clip_fraction        | 0.0106       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.185       |
|    explained_variance   | 0.000916     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00574      |
|    n_updates            | 6290         |
|    policy_gradient_loss | -0.0023      |
|    reward               | 0.026629332  |
|    value_loss           | 0.0218       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.797         |
| time/                   |               |
|    fps                  | 168           |
|    iterations           | 631           |
|    time_elapsed         | 3837          |
|    total_timesteps      | 646144        |
| train/                  |               |
|    approx_kl            | 0.00030831085 |
|    clip_fraction        | 0.004         |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.188        |
|    explained_variance   | 0.000644      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00907       |
|    n_updates            | 6300          |
|    policy_gradient_loss | 0.000651      |
|    reward               | 0.001875591   |
|    value_loss           | 0.0261        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.794        |
| time/                   |              |
|    fps                  | 168          |
|    iterations           | 632          |
|    time_elapsed         | 3843         |
|    total_timesteps      | 647168       |
| train/                  |              |
|    approx_kl            | 0.0011752844 |
|    clip_fraction        | 0.0169       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.215       |
|    explained_variance   | 0.000611     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00235      |
|    n_updates            | 6310         |
|    policy_gradient_loss | -0.000578    |
|    reward               | 0.09019041   |
|    value_loss           | 0.0277       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.794         |
| time/                   |               |
|    fps                  | 168           |
|    iterations           | 633           |
|    time_elapsed         | 3850          |
|    total_timesteps      | 648192        |
| train/                  |               |
|    approx_kl            | 0.00085413625 |
|    clip_fraction        | 0.0191        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.203        |
|    explained_variance   | 0.000701      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00935       |
|    n_updates            | 6320          |
|    policy_gradient_loss | -0.00167      |
|    reward               | 0.012657089   |
|    value_loss           | 0.0285        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.798        |
| time/                   |              |
|    fps                  | 168          |
|    iterations           | 634          |
|    time_elapsed         | 3857         |
|    total_timesteps      | 649216       |
| train/                  |              |
|    approx_kl            | 0.0014478981 |
|    clip_fraction        | 0.0156       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.177       |
|    explained_variance   | 0.000372     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00452      |
|    n_updates            | 6330         |
|    policy_gradient_loss | -0.00232     |
|    reward               | 0.0          |
|    value_loss           | 0.0208       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.807         |
| time/                   |               |
|    fps                  | 168           |
|    iterations           | 635           |
|    time_elapsed         | 3864          |
|    total_timesteps      | 650240        |
| train/                  |               |
|    approx_kl            | 0.00044767786 |
|    clip_fraction        | 0.00342       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.181        |
|    explained_variance   | 0.000232      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00578       |
|    n_updates            | 6340          |
|    policy_gradient_loss | 0.000363      |
|    reward               | 0.071145214   |
|    value_loss           | 0.0313        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.806         |
| time/                   |               |
|    fps                  | 168           |
|    iterations           | 636           |
|    time_elapsed         | 3870          |
|    total_timesteps      | 651264        |
| train/                  |               |
|    approx_kl            | 0.00057187676 |
|    clip_fraction        | 0.0151        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.188        |
|    explained_variance   | 0.000739      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00982       |
|    n_updates            | 6350          |
|    policy_gradient_loss | -0.000452     |
|    reward               | 0.004028678   |
|    value_loss           | 0.0279        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.802         |
| time/                   |               |
|    fps                  | 168           |
|    iterations           | 637           |
|    time_elapsed         | 3877          |
|    total_timesteps      | 652288        |
| train/                  |               |
|    approx_kl            | 0.00063391787 |
|    clip_fraction        | 0.00879       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.181        |
|    explained_variance   | 0.000585      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0182        |
|    n_updates            | 6360          |
|    policy_gradient_loss | -0.000556     |
|    reward               | 0.045488037   |
|    value_loss           | 0.0324        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.8          |
| time/                   |              |
|    fps                  | 168          |
|    iterations           | 638          |
|    time_elapsed         | 3883         |
|    total_timesteps      | 653312       |
| train/                  |              |
|    approx_kl            | 0.0015037479 |
|    clip_fraction        | 0.0133       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.202       |
|    explained_variance   | 0.00104      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0125       |
|    n_updates            | 6370         |
|    policy_gradient_loss | -0.000105    |
|    reward               | 0.0          |
|    value_loss           | 0.0341       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.812         |
| time/                   |               |
|    fps                  | 168           |
|    iterations           | 639           |
|    time_elapsed         | 3890          |
|    total_timesteps      | 654336        |
| train/                  |               |
|    approx_kl            | 0.00086677686 |
|    clip_fraction        | 0.0196        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.207        |
|    explained_variance   | 0.000859      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00474       |
|    n_updates            | 6380          |
|    policy_gradient_loss | -0.00209      |
|    reward               | 0.0           |
|    value_loss           | 0.031         |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.806        |
| time/                   |              |
|    fps                  | 168          |
|    iterations           | 640          |
|    time_elapsed         | 3896         |
|    total_timesteps      | 655360       |
| train/                  |              |
|    approx_kl            | 7.565337e-05 |
|    clip_fraction        | 0.00352      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.207       |
|    explained_variance   | 0.000546     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00769      |
|    n_updates            | 6390         |
|    policy_gradient_loss | 0.000487     |
|    reward               | 0.005433808  |
|    value_loss           | 0.0256       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.806         |
| time/                   |               |
|    fps                  | 168           |
|    iterations           | 641           |
|    time_elapsed         | 3902          |
|    total_timesteps      | 656384        |
| train/                  |               |
|    approx_kl            | 0.00048452202 |
|    clip_fraction        | 0.0181        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.235        |
|    explained_variance   | 0.00204       |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0137        |
|    n_updates            | 6400          |
|    policy_gradient_loss | 0.000208      |
|    reward               | 0.015889501   |
|    value_loss           | 0.0352        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.819         |
| time/                   |               |
|    fps                  | 168           |
|    iterations           | 642           |
|    time_elapsed         | 3909          |
|    total_timesteps      | 657408        |
| train/                  |               |
|    approx_kl            | 0.00090755767 |
|    clip_fraction        | 0.0112        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.231        |
|    explained_variance   | 0.00283       |
|    learning_rate        | 0.0002        |
|    loss                 | -0.00692      |
|    n_updates            | 6410          |
|    policy_gradient_loss | -0.000515     |
|    reward               | 0.027541436   |
|    value_loss           | 0.0198        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.822        |
| time/                   |              |
|    fps                  | 168          |
|    iterations           | 643          |
|    time_elapsed         | 3915         |
|    total_timesteps      | 658432       |
| train/                  |              |
|    approx_kl            | 0.0009866699 |
|    clip_fraction        | 0.0135       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.197       |
|    explained_variance   | 0.000674     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00628      |
|    n_updates            | 6420         |
|    policy_gradient_loss | -0.00222     |
|    reward               | 0.0          |
|    value_loss           | 0.0207       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.822         |
| time/                   |               |
|    fps                  | 168           |
|    iterations           | 644           |
|    time_elapsed         | 3921          |
|    total_timesteps      | 659456        |
| train/                  |               |
|    approx_kl            | 0.00042324176 |
|    clip_fraction        | 0.00898       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.217        |
|    explained_variance   | 0.000296      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00527       |
|    n_updates            | 6430          |
|    policy_gradient_loss | 0.000708      |
|    reward               | 0.04246938    |
|    value_loss           | 0.0345        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.835         |
| time/                   |               |
|    fps                  | 168           |
|    iterations           | 645           |
|    time_elapsed         | 3927          |
|    total_timesteps      | 660480        |
| train/                  |               |
|    approx_kl            | 0.00029485562 |
|    clip_fraction        | 0.00586       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.195        |
|    explained_variance   | 0.000891      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0017        |
|    n_updates            | 6440          |
|    policy_gradient_loss | -0.000702     |
|    reward               | -0.00020002   |
|    value_loss           | 0.0205        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.838         |
| time/                   |               |
|    fps                  | 168           |
|    iterations           | 646           |
|    time_elapsed         | 3934          |
|    total_timesteps      | 661504        |
| train/                  |               |
|    approx_kl            | 0.00050608645 |
|    clip_fraction        | 0.0119        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.214        |
|    explained_variance   | 0.00336       |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00187       |
|    n_updates            | 6450          |
|    policy_gradient_loss | 0.000341      |
|    reward               | 0.0           |
|    value_loss           | 0.0232        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.844        |
| time/                   |              |
|    fps                  | 168          |
|    iterations           | 647          |
|    time_elapsed         | 3941         |
|    total_timesteps      | 662528       |
| train/                  |              |
|    approx_kl            | 0.0006511779 |
|    clip_fraction        | 0.00342      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.221       |
|    explained_variance   | 0.0019       |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0026       |
|    n_updates            | 6460         |
|    policy_gradient_loss | 0.00054      |
|    reward               | 0.042113926  |
|    value_loss           | 0.025        |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.845        |
| time/                   |              |
|    fps                  | 168          |
|    iterations           | 648          |
|    time_elapsed         | 3947         |
|    total_timesteps      | 663552       |
| train/                  |              |
|    approx_kl            | 0.0006697802 |
|    clip_fraction        | 0.015        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.244       |
|    explained_variance   | 0.00075      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00348      |
|    n_updates            | 6470         |
|    policy_gradient_loss | 0.000714     |
|    reward               | 0.010603727  |
|    value_loss           | 0.0305       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.839         |
| time/                   |               |
|    fps                  | 168           |
|    iterations           | 649           |
|    time_elapsed         | 3953          |
|    total_timesteps      | 664576        |
| train/                  |               |
|    approx_kl            | 0.00015427801 |
|    clip_fraction        | 0.00459       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.26         |
|    explained_variance   | 0.000984      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0264        |
|    n_updates            | 6480          |
|    policy_gradient_loss | 0.0011        |
|    reward               | 0.026912073   |
|    value_loss           | 0.0257        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.836        |
| time/                   |              |
|    fps                  | 168          |
|    iterations           | 650          |
|    time_elapsed         | 3959         |
|    total_timesteps      | 665600       |
| train/                  |              |
|    approx_kl            | 0.0009997482 |
|    clip_fraction        | 0.0225       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.253       |
|    explained_variance   | 0.00139      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00146      |
|    n_updates            | 6490         |
|    policy_gradient_loss | -0.0018      |
|    reward               | -0.02260172  |
|    value_loss           | 0.0206       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.836        |
| time/                   |              |
|    fps                  | 168          |
|    iterations           | 651          |
|    time_elapsed         | 3965         |
|    total_timesteps      | 666624       |
| train/                  |              |
|    approx_kl            | 0.0008091085 |
|    clip_fraction        | 0.0082       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.238       |
|    explained_variance   | -6.06e-05    |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00971      |
|    n_updates            | 6500         |
|    policy_gradient_loss | -0.000235    |
|    reward               | -0.020131355 |
|    value_loss           | 0.0285       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.833        |
| time/                   |              |
|    fps                  | 168          |
|    iterations           | 652          |
|    time_elapsed         | 3971         |
|    total_timesteps      | 667648       |
| train/                  |              |
|    approx_kl            | 0.0010530913 |
|    clip_fraction        | 0.00947      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.238       |
|    explained_variance   | 0.000787     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00228     |
|    n_updates            | 6510         |
|    policy_gradient_loss | -0.000103    |
|    reward               | 0.0          |
|    value_loss           | 0.0197       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.835         |
| time/                   |               |
|    fps                  | 168           |
|    iterations           | 653           |
|    time_elapsed         | 3977          |
|    total_timesteps      | 668672        |
| train/                  |               |
|    approx_kl            | 0.00096092327 |
|    clip_fraction        | 0.0167        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.26         |
|    explained_variance   | 0.000321      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00501       |
|    n_updates            | 6520          |
|    policy_gradient_loss | 0.000468      |
|    reward               | 0.0           |
|    value_loss           | 0.0237        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.827        |
| time/                   |              |
|    fps                  | 168          |
|    iterations           | 655          |
|    time_elapsed         | 3989         |
|    total_timesteps      | 670720       |
| train/                  |              |
|    approx_kl            | 0.0010827088 |
|    clip_fraction        | 0.00732      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.247       |
|    explained_variance   | 0.00134      |
|    learning_rate        | 0.0002       |
|    loss                 | -0.000369    |
|    n_updates            | 6540         |
|    policy_gradient_loss | -0.000828    |
|    reward               | 0.0          |
|    value_loss           | 0.0219       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.833        |
| time/                   |              |
|    fps                  | 168          |
|    iterations           | 656          |
|    time_elapsed         | 3995         |
|    total_timesteps      | 671744       |
| train/                  |              |
|    approx_kl            | 0.0008137072 |
|    clip_fraction        | 0.0146       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.228       |
|    explained_variance   | 0.000804     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0055       |
|    n_updates            | 6550         |
|    policy_gradient_loss | -0.00211     |
|    reward               | 0.0          |
|    value_loss           | 0.0177       |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 0.84        |
| time/                   |             |
|    fps                  | 168         |
|    iterations           | 657         |
|    time_elapsed         | 4001        |
|    total_timesteps      | 672768      |
| train/                  |             |
|    approx_kl            | 0.001096626 |
|    clip_fraction        | 0.0211      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.234      |
|    explained_variance   | 0.000697    |
|    learning_rate        | 0.0002      |
|    loss                 | 0.00191     |
|    n_updates            | 6560        |
|    policy_gradient_loss | -0.00145    |
|    reward               | 0.0         |
|    value_loss           | 0.0321      |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.835        |
| time/                   |              |
|    fps                  | 168          |
|    iterations           | 658          |
|    time_elapsed         | 4007         |
|    total_timesteps      | 673792       |
| train/                  |              |
|    approx_kl            | 0.0002631012 |
|    clip_fraction        | 0.00381      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.212       |
|    explained_variance   | -0.000437    |
|    learning_rate        | 0.0002       |
|    loss                 | 0.002        |
|    n_updates            | 6570         |
|    policy_gradient_loss | -0.000294    |
|    reward               | 0.0          |
|    value_loss           | 0.022        |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.832         |
| time/                   |               |
|    fps                  | 168           |
|    iterations           | 659           |
|    time_elapsed         | 4013          |
|    total_timesteps      | 674816        |
| train/                  |               |
|    approx_kl            | 0.00071705005 |
|    clip_fraction        | 0.0147        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.236        |
|    explained_variance   | 0.000607      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0179        |
|    n_updates            | 6580          |
|    policy_gradient_loss | 0.000196      |
|    reward               | 0.014044813   |
|    value_loss           | 0.0278        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.836        |
| time/                   |              |
|    fps                  | 168          |
|    iterations           | 660          |
|    time_elapsed         | 4020         |
|    total_timesteps      | 675840       |
| train/                  |              |
|    approx_kl            | 0.0013269454 |
|    clip_fraction        | 0.018        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.191       |
|    explained_variance   | 0.000503     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.000827     |
|    n_updates            | 6590         |
|    policy_gradient_loss | -0.00278     |
|    reward               | 0.029026186  |
|    value_loss           | 0.0198       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.836         |
| time/                   |               |
|    fps                  | 168           |
|    iterations           | 661           |
|    time_elapsed         | 4027          |
|    total_timesteps      | 676864        |
| train/                  |               |
|    approx_kl            | 0.00062162994 |
|    clip_fraction        | 0.00537       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.206        |
|    explained_variance   | 0.000802      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0141        |
|    n_updates            | 6600          |
|    policy_gradient_loss | 0.000533      |
|    reward               | 0.06642783    |
|    value_loss           | 0.0205        |
-------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 0.835       |
| time/                   |             |
|    fps                  | 168         |
|    iterations           | 662         |
|    time_elapsed         | 4033        |
|    total_timesteps      | 677888      |
| train/                  |             |
|    approx_kl            | 0.001004585 |
|    clip_fraction        | 0.0231      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.253      |
|    explained_variance   | 0.000632    |
|    learning_rate        | 0.0002      |
|    loss                 | 0.00906     |
|    n_updates            | 6610        |
|    policy_gradient_loss | -0.0002     |
|    reward               | 0.03208857  |
|    value_loss           | 0.0254      |
-----------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.836         |
| time/                   |               |
|    fps                  | 168           |
|    iterations           | 663           |
|    time_elapsed         | 4039          |
|    total_timesteps      | 678912        |
| train/                  |               |
|    approx_kl            | 0.00092701305 |
|    clip_fraction        | 0.0105        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.25         |
|    explained_variance   | 0.000659      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00116       |
|    n_updates            | 6620          |
|    policy_gradient_loss | -0.000793     |
|    reward               | 0.0           |
|    value_loss           | 0.0228        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.842        |
| time/                   |              |
|    fps                  | 168          |
|    iterations           | 664          |
|    time_elapsed         | 4046         |
|    total_timesteps      | 679936       |
| train/                  |              |
|    approx_kl            | 0.0013678164 |
|    clip_fraction        | 0.0218       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.183       |
|    explained_variance   | 0.00118      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00376      |
|    n_updates            | 6630         |
|    policy_gradient_loss | -0.00398     |
|    reward               | 0.01650095   |
|    value_loss           | 0.0207       |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 0.85        |
| time/                   |             |
|    fps                  | 168         |
|    iterations           | 665         |
|    time_elapsed         | 4053        |
|    total_timesteps      | 680960      |
| train/                  |             |
|    approx_kl            | 0.000476446 |
|    clip_fraction        | 0.0136      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.202      |
|    explained_variance   | 0.00035     |
|    learning_rate        | 0.0002      |
|    loss                 | 0.0235      |
|    n_updates            | 6640        |
|    policy_gradient_loss | 0.000216    |
|    reward               | 0.032715864 |
|    value_loss           | 0.0226      |
-----------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.851         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 666           |
|    time_elapsed         | 4060          |
|    total_timesteps      | 681984        |
| train/                  |               |
|    approx_kl            | 0.00071509404 |
|    clip_fraction        | 0.0151        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.207        |
|    explained_variance   | 0.000708      |
|    learning_rate        | 0.0002        |
|    loss                 | -0.00465      |
|    n_updates            | 6650          |
|    policy_gradient_loss | -0.000902     |
|    reward               | 0.0           |
|    value_loss           | 0.0269        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.854        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 667          |
|    time_elapsed         | 4066         |
|    total_timesteps      | 683008       |
| train/                  |              |
|    approx_kl            | 0.0007096477 |
|    clip_fraction        | 0.00645      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.195       |
|    explained_variance   | 0.000737     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0163       |
|    n_updates            | 6660         |
|    policy_gradient_loss | -0.000363    |
|    reward               | 0.004454127  |
|    value_loss           | 0.0255       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.85         |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 668          |
|    time_elapsed         | 4073         |
|    total_timesteps      | 684032       |
| train/                  |              |
|    approx_kl            | 0.0007687999 |
|    clip_fraction        | 0.0209       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.227       |
|    explained_variance   | 0.000673     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.0033      |
|    n_updates            | 6670         |
|    policy_gradient_loss | -0.00059     |
|    reward               | 0.0          |
|    value_loss           | 0.0318       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.852        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 669          |
|    time_elapsed         | 4079         |
|    total_timesteps      | 685056       |
| train/                  |              |
|    approx_kl            | 0.0007469946 |
|    clip_fraction        | 0.0101       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.251       |
|    explained_variance   | 0.00228      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00287      |
|    n_updates            | 6680         |
|    policy_gradient_loss | 0.000178     |
|    reward               | 0.02438125   |
|    value_loss           | 0.0266       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.856        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 670          |
|    time_elapsed         | 4085         |
|    total_timesteps      | 686080       |
| train/                  |              |
|    approx_kl            | 0.0010856341 |
|    clip_fraction        | 0.0194       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.248       |
|    explained_variance   | 0.00144      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00702      |
|    n_updates            | 6690         |
|    policy_gradient_loss | -0.00192     |
|    reward               | 0.013795561  |
|    value_loss           | 0.0177       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.841        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 671          |
|    time_elapsed         | 4092         |
|    total_timesteps      | 687104       |
| train/                  |              |
|    approx_kl            | 0.0011731328 |
|    clip_fraction        | 0.0273       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.24        |
|    explained_variance   | 0.00178      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00535      |
|    n_updates            | 6700         |
|    policy_gradient_loss | -0.00417     |
|    reward               | 0.0023548482 |
|    value_loss           | 0.0268       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.835         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 672           |
|    time_elapsed         | 4098          |
|    total_timesteps      | 688128        |
| train/                  |               |
|    approx_kl            | 0.00091647886 |
|    clip_fraction        | 0.0084        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.228        |
|    explained_variance   | 0.00223       |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0062        |
|    n_updates            | 6710          |
|    policy_gradient_loss | -0.000914     |
|    reward               | 0.0           |
|    value_loss           | 0.029         |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.827         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 673           |
|    time_elapsed         | 4104          |
|    total_timesteps      | 689152        |
| train/                  |               |
|    approx_kl            | 0.00097373116 |
|    clip_fraction        | 0.0129        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.181        |
|    explained_variance   | 0.00105       |
|    learning_rate        | 0.0002        |
|    loss                 | -0.00566      |
|    n_updates            | 6720          |
|    policy_gradient_loss | -0.00229      |
|    reward               | 0.0059505273  |
|    value_loss           | 0.0249        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.824        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 674          |
|    time_elapsed         | 4110         |
|    total_timesteps      | 690176       |
| train/                  |              |
|    approx_kl            | 0.0005942988 |
|    clip_fraction        | 0.0117       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.179       |
|    explained_variance   | -0.000146    |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0158       |
|    n_updates            | 6730         |
|    policy_gradient_loss | -0.000287    |
|    reward               | 0.0          |
|    value_loss           | 0.0296       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.829         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 676           |
|    time_elapsed         | 4121          |
|    total_timesteps      | 692224        |
| train/                  |               |
|    approx_kl            | 0.00093003747 |
|    clip_fraction        | 0.00771       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.167        |
|    explained_variance   | 0.000934      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00307       |
|    n_updates            | 6750          |
|    policy_gradient_loss | -0.000873     |
|    reward               | -0.004178378  |
|    value_loss           | 0.0207        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.831         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 677           |
|    time_elapsed         | 4127          |
|    total_timesteps      | 693248        |
| train/                  |               |
|    approx_kl            | 0.00033789407 |
|    clip_fraction        | 0.0131        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.185        |
|    explained_variance   | 0.000439      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0124        |
|    n_updates            | 6760          |
|    policy_gradient_loss | 0.000397      |
|    reward               | -0.66710037   |
|    value_loss           | 0.0297        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.829         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 678           |
|    time_elapsed         | 4133          |
|    total_timesteps      | 694272        |
| train/                  |               |
|    approx_kl            | 0.00028284075 |
|    clip_fraction        | 0.0115        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.2          |
|    explained_variance   | 0.000493      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0199        |
|    n_updates            | 6770          |
|    policy_gradient_loss | -6.13e-06     |
|    reward               | -0.0015409808 |
|    value_loss           | 0.0402        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.828         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 679           |
|    time_elapsed         | 4139          |
|    total_timesteps      | 695296        |
| train/                  |               |
|    approx_kl            | 0.0008211161  |
|    clip_fraction        | 0.0043        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.187        |
|    explained_variance   | 0.000464      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00486       |
|    n_updates            | 6780          |
|    policy_gradient_loss | -0.000559     |
|    reward               | -0.0033733794 |
|    value_loss           | 0.0268        |
-------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 0.82        |
| time/                   |             |
|    fps                  | 167         |
|    iterations           | 680         |
|    time_elapsed         | 4145        |
|    total_timesteps      | 696320      |
| train/                  |             |
|    approx_kl            | 0.001037145 |
|    clip_fraction        | 0.0116      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.186      |
|    explained_variance   | 0.000459    |
|    learning_rate        | 0.0002      |
|    loss                 | -0.00989    |
|    n_updates            | 6790        |
|    policy_gradient_loss | -0.000931   |
|    reward               | 0.0         |
|    value_loss           | 0.0171      |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.818        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 681          |
|    time_elapsed         | 4151         |
|    total_timesteps      | 697344       |
| train/                  |              |
|    approx_kl            | 0.0006415931 |
|    clip_fraction        | 0.00693      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.185       |
|    explained_variance   | 0.000837     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0059       |
|    n_updates            | 6800         |
|    policy_gradient_loss | -0.000488    |
|    reward               | 0.0          |
|    value_loss           | 0.0245       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.824        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 682          |
|    time_elapsed         | 4157         |
|    total_timesteps      | 698368       |
| train/                  |              |
|    approx_kl            | 0.0005831953 |
|    clip_fraction        | 0.0103       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.205       |
|    explained_variance   | 0.00245      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0118       |
|    n_updates            | 6810         |
|    policy_gradient_loss | -0.000371    |
|    reward               | 0.004621075  |
|    value_loss           | 0.0281       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.826         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 683           |
|    time_elapsed         | 4163          |
|    total_timesteps      | 699392        |
| train/                  |               |
|    approx_kl            | 0.00054792815 |
|    clip_fraction        | 0.00732       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.176        |
|    explained_variance   | 0.000406      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00275       |
|    n_updates            | 6820          |
|    policy_gradient_loss | -0.00207      |
|    reward               | 0.0           |
|    value_loss           | 0.0238        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.828        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 684          |
|    time_elapsed         | 4170         |
|    total_timesteps      | 700416       |
| train/                  |              |
|    approx_kl            | 0.0006646397 |
|    clip_fraction        | 0.0132       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.194       |
|    explained_variance   | 0.000566     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00582      |
|    n_updates            | 6830         |
|    policy_gradient_loss | 0.0002       |
|    reward               | 0.0          |
|    value_loss           | 0.0299       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.836         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 685           |
|    time_elapsed         | 4176          |
|    total_timesteps      | 701440        |
| train/                  |               |
|    approx_kl            | 0.00055949634 |
|    clip_fraction        | 0.0116        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.201        |
|    explained_variance   | 0.000748      |
|    learning_rate        | 0.0002        |
|    loss                 | -0.00294      |
|    n_updates            | 6840          |
|    policy_gradient_loss | -0.00119      |
|    reward               | -0.016767323  |
|    value_loss           | 0.0274        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.838        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 686          |
|    time_elapsed         | 4182         |
|    total_timesteps      | 702464       |
| train/                  |              |
|    approx_kl            | 0.0009552397 |
|    clip_fraction        | 0.0041       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.177       |
|    explained_variance   | 0.00172      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0111       |
|    n_updates            | 6850         |
|    policy_gradient_loss | -0.000822    |
|    reward               | 0.0          |
|    value_loss           | 0.0256       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.842         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 687           |
|    time_elapsed         | 4188          |
|    total_timesteps      | 703488        |
| train/                  |               |
|    approx_kl            | 0.00035936735 |
|    clip_fraction        | 0.0142        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.185        |
|    explained_variance   | 0.000249      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0203        |
|    n_updates            | 6860          |
|    policy_gradient_loss | 0.000645      |
|    reward               | 0.024661444   |
|    value_loss           | 0.0278        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.838         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 688           |
|    time_elapsed         | 4194          |
|    total_timesteps      | 704512        |
| train/                  |               |
|    approx_kl            | 0.00063997216 |
|    clip_fraction        | 0.0141        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.184        |
|    explained_variance   | 0.000523      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00832       |
|    n_updates            | 6870          |
|    policy_gradient_loss | -0.00114      |
|    reward               | 0.020272586   |
|    value_loss           | 0.0275        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.846         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 689           |
|    time_elapsed         | 4201          |
|    total_timesteps      | 705536        |
| train/                  |               |
|    approx_kl            | 0.00042392605 |
|    clip_fraction        | 0.00693       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.202        |
|    explained_variance   | 0.000801      |
|    learning_rate        | 0.0002        |
|    loss                 | -0.00331      |
|    n_updates            | 6880          |
|    policy_gradient_loss | -0.000302     |
|    reward               | 0.009486683   |
|    value_loss           | 0.0253        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.838         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 690           |
|    time_elapsed         | 4207          |
|    total_timesteps      | 706560        |
| train/                  |               |
|    approx_kl            | 0.00051877345 |
|    clip_fraction        | 0.00762       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.217        |
|    explained_variance   | 0.000214      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0126        |
|    n_updates            | 6890          |
|    policy_gradient_loss | 0.000506      |
|    reward               | 0.006243042   |
|    value_loss           | 0.0286        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.836        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 691          |
|    time_elapsed         | 4214         |
|    total_timesteps      | 707584       |
| train/                  |              |
|    approx_kl            | 0.0011231798 |
|    clip_fraction        | 0.0184       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.23        |
|    explained_variance   | 0.000569     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00799      |
|    n_updates            | 6900         |
|    policy_gradient_loss | -0.00143     |
|    reward               | 0.01136343   |
|    value_loss           | 0.0231       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.835         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 694           |
|    time_elapsed         | 4232          |
|    total_timesteps      | 710656        |
| train/                  |               |
|    approx_kl            | 0.00046139467 |
|    clip_fraction        | 0.0193        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.247        |
|    explained_variance   | 0.000348      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0043        |
|    n_updates            | 6930          |
|    policy_gradient_loss | -2.45e-05     |
|    reward               | -0.022170011  |
|    value_loss           | 0.0239        |
-------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 0.836       |
| time/                   |             |
|    fps                  | 167         |
|    iterations           | 695         |
|    time_elapsed         | 4238        |
|    total_timesteps      | 711680      |
| train/                  |             |
|    approx_kl            | 0.003637582 |
|    clip_fraction        | 0.0358      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.305      |
|    explained_variance   | 0.00057     |
|    learning_rate        | 0.0002      |
|    loss                 | 0.00517     |
|    n_updates            | 6940        |
|    policy_gradient_loss | -0.00122    |
|    reward               | 0.0         |
|    value_loss           | 0.0319      |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.842        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 696          |
|    time_elapsed         | 4244         |
|    total_timesteps      | 712704       |
| train/                  |              |
|    approx_kl            | 0.0018301948 |
|    clip_fraction        | 0.0134       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.281       |
|    explained_variance   | 0.0008       |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00429      |
|    n_updates            | 6950         |
|    policy_gradient_loss | -0.00211     |
|    reward               | 0.010229663  |
|    value_loss           | 0.0223       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.833        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 697          |
|    time_elapsed         | 4250         |
|    total_timesteps      | 713728       |
| train/                  |              |
|    approx_kl            | 0.0007351142 |
|    clip_fraction        | 0.00918      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.252       |
|    explained_variance   | 0.00116      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00284      |
|    n_updates            | 6960         |
|    policy_gradient_loss | -0.00109     |
|    reward               | 0.003524312  |
|    value_loss           | 0.0179       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.832         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 698           |
|    time_elapsed         | 4257          |
|    total_timesteps      | 714752        |
| train/                  |               |
|    approx_kl            | 0.00072225946 |
|    clip_fraction        | 0.00937       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.215        |
|    explained_variance   | 0.00115       |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00398       |
|    n_updates            | 6970          |
|    policy_gradient_loss | -0.00161      |
|    reward               | -0.008069178  |
|    value_loss           | 0.0235        |
-------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 0.838       |
| time/                   |             |
|    fps                  | 167         |
|    iterations           | 699         |
|    time_elapsed         | 4263        |
|    total_timesteps      | 715776      |
| train/                  |             |
|    approx_kl            | 0.001111198 |
|    clip_fraction        | 0.0129      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.228      |
|    explained_variance   | 0.00087     |
|    learning_rate        | 0.0002      |
|    loss                 | 0.000718    |
|    n_updates            | 6980        |
|    policy_gradient_loss | -0.00123    |
|    reward               | 0.030956924 |
|    value_loss           | 0.0209      |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.837        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 700          |
|    time_elapsed         | 4270         |
|    total_timesteps      | 716800       |
| train/                  |              |
|    approx_kl            | 0.0008945616 |
|    clip_fraction        | 0.0134       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.193       |
|    explained_variance   | 0.000535     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00122      |
|    n_updates            | 6990         |
|    policy_gradient_loss | -0.00221     |
|    reward               | -0.017151328 |
|    value_loss           | 0.0187       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.831        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 701          |
|    time_elapsed         | 4276         |
|    total_timesteps      | 717824       |
| train/                  |              |
|    approx_kl            | 0.0003092538 |
|    clip_fraction        | 0.00566      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.191       |
|    explained_variance   | 0.000106     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.000706     |
|    n_updates            | 7000         |
|    policy_gradient_loss | 0.000502     |
|    reward               | -0.013221561 |
|    value_loss           | 0.0243       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.837        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 703          |
|    time_elapsed         | 4289         |
|    total_timesteps      | 719872       |
| train/                  |              |
|    approx_kl            | 0.0035501332 |
|    clip_fraction        | 0.0174       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.277       |
|    explained_variance   | 0.000225     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00751      |
|    n_updates            | 7020         |
|    policy_gradient_loss | -0.000328    |
|    reward               | 0.019939959  |
|    value_loss           | 0.0235       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.833        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 704          |
|    time_elapsed         | 4295         |
|    total_timesteps      | 720896       |
| train/                  |              |
|    approx_kl            | 0.0011451244 |
|    clip_fraction        | 0.0264       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.337       |
|    explained_variance   | 0.000506     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0103       |
|    n_updates            | 7030         |
|    policy_gradient_loss | -0.000251    |
|    reward               | -0.060896378 |
|    value_loss           | 0.0266       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.837        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 706          |
|    time_elapsed         | 4307         |
|    total_timesteps      | 722944       |
| train/                  |              |
|    approx_kl            | 0.0017315402 |
|    clip_fraction        | 0.0256       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.322       |
|    explained_variance   | 0.0011       |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00475     |
|    n_updates            | 7050         |
|    policy_gradient_loss | -0.00231     |
|    reward               | 0.02072862   |
|    value_loss           | 0.0156       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.838        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 707          |
|    time_elapsed         | 4313         |
|    total_timesteps      | 723968       |
| train/                  |              |
|    approx_kl            | 0.0009708728 |
|    clip_fraction        | 0.0185       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.34        |
|    explained_variance   | 0.000699     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00906     |
|    n_updates            | 7060         |
|    policy_gradient_loss | -0.00177     |
|    reward               | 0.01796203   |
|    value_loss           | 0.0203       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.838        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 708          |
|    time_elapsed         | 4320         |
|    total_timesteps      | 724992       |
| train/                  |              |
|    approx_kl            | 0.0017691171 |
|    clip_fraction        | 0.0143       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.327       |
|    explained_variance   | 0.00103      |
|    learning_rate        | 0.0002       |
|    loss                 | -0.0033      |
|    n_updates            | 7070         |
|    policy_gradient_loss | -0.000764    |
|    reward               | 0.0          |
|    value_loss           | 0.0206       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.823        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 709          |
|    time_elapsed         | 4327         |
|    total_timesteps      | 726016       |
| train/                  |              |
|    approx_kl            | 0.0012500586 |
|    clip_fraction        | 0.0296       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.323       |
|    explained_variance   | 0.00138      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0014       |
|    n_updates            | 7080         |
|    policy_gradient_loss | -0.00391     |
|    reward               | 0.0          |
|    value_loss           | 0.0175       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.829         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 710           |
|    time_elapsed         | 4333          |
|    total_timesteps      | 727040        |
| train/                  |               |
|    approx_kl            | 0.00051200006 |
|    clip_fraction        | 0.0141        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.351        |
|    explained_variance   | 0.000947      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00305       |
|    n_updates            | 7090          |
|    policy_gradient_loss | 0.00108       |
|    reward               | 0.03982499    |
|    value_loss           | 0.0242        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.823        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 711          |
|    time_elapsed         | 4339         |
|    total_timesteps      | 728064       |
| train/                  |              |
|    approx_kl            | 0.0005770787 |
|    clip_fraction        | 0.0085       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.345       |
|    explained_variance   | 0.001        |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00328     |
|    n_updates            | 7100         |
|    policy_gradient_loss | -0.000677    |
|    reward               | 0.0          |
|    value_loss           | 0.0236       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.829         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 713           |
|    time_elapsed         | 4353          |
|    total_timesteps      | 730112        |
| train/                  |               |
|    approx_kl            | 0.00063965074 |
|    clip_fraction        | 0.00645       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.335        |
|    explained_variance   | 0.000925      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00944       |
|    n_updates            | 7120          |
|    policy_gradient_loss | 0.000575      |
|    reward               | 0.065360025   |
|    value_loss           | 0.0166        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.82         |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 714          |
|    time_elapsed         | 4359         |
|    total_timesteps      | 731136       |
| train/                  |              |
|    approx_kl            | 0.0022572135 |
|    clip_fraction        | 0.0313       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.31        |
|    explained_variance   | 0.00132      |
|    learning_rate        | 0.0002       |
|    loss                 | -0.0146      |
|    n_updates            | 7130         |
|    policy_gradient_loss | -0.00478     |
|    reward               | -0.00020002  |
|    value_loss           | 0.0171       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.808        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 715          |
|    time_elapsed         | 4365         |
|    total_timesteps      | 732160       |
| train/                  |              |
|    approx_kl            | 0.0005865009 |
|    clip_fraction        | 0.0171       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.307       |
|    explained_variance   | 0.000429     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00184      |
|    n_updates            | 7140         |
|    policy_gradient_loss | -0.000357    |
|    reward               | 0.013648197  |
|    value_loss           | 0.0206       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.814        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 716          |
|    time_elapsed         | 4371         |
|    total_timesteps      | 733184       |
| train/                  |              |
|    approx_kl            | 0.0010805104 |
|    clip_fraction        | 0.0151       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.298       |
|    explained_variance   | 0.000866     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00932     |
|    n_updates            | 7150         |
|    policy_gradient_loss | -0.00106     |
|    reward               | 0.06002146   |
|    value_loss           | 0.0191       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.811        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 717          |
|    time_elapsed         | 4377         |
|    total_timesteps      | 734208       |
| train/                  |              |
|    approx_kl            | 0.0007140938 |
|    clip_fraction        | 0.00742      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.292       |
|    explained_variance   | 0.00114      |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00476     |
|    n_updates            | 7160         |
|    policy_gradient_loss | -0.000551    |
|    reward               | 0.04515541   |
|    value_loss           | 0.0181       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.817         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 718           |
|    time_elapsed         | 4383          |
|    total_timesteps      | 735232        |
| train/                  |               |
|    approx_kl            | 0.00016225118 |
|    clip_fraction        | 0.00234       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.28         |
|    explained_variance   | 0.00218       |
|    learning_rate        | 0.0002        |
|    loss                 | 0.000461      |
|    n_updates            | 7170          |
|    policy_gradient_loss | 6.43e-05      |
|    reward               | 0.0           |
|    value_loss           | 0.0211        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.821        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 719          |
|    time_elapsed         | 4389         |
|    total_timesteps      | 736256       |
| train/                  |              |
|    approx_kl            | 0.0002614865 |
|    clip_fraction        | 0.0137       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.293       |
|    explained_variance   | 0.00108      |
|    learning_rate        | 0.0002       |
|    loss                 | -0.0019      |
|    n_updates            | 7180         |
|    policy_gradient_loss | 0.00116      |
|    reward               | 0.0          |
|    value_loss           | 0.0249       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.818        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 720          |
|    time_elapsed         | 4395         |
|    total_timesteps      | 737280       |
| train/                  |              |
|    approx_kl            | 0.0011748006 |
|    clip_fraction        | 0.021        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.286       |
|    explained_variance   | 0.000607     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00333     |
|    n_updates            | 7190         |
|    policy_gradient_loss | -0.00232     |
|    reward               | -0.00020002  |
|    value_loss           | 0.02         |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.815        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 721          |
|    time_elapsed         | 4401         |
|    total_timesteps      | 738304       |
| train/                  |              |
|    approx_kl            | 0.0010429684 |
|    clip_fraction        | 0.0128       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.262       |
|    explained_variance   | 0.000416     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0016       |
|    n_updates            | 7200         |
|    policy_gradient_loss | -0.00183     |
|    reward               | 0.006686396  |
|    value_loss           | 0.0237       |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 0.824       |
| time/                   |             |
|    fps                  | 167         |
|    iterations           | 723         |
|    time_elapsed         | 4413        |
|    total_timesteps      | 740352      |
| train/                  |             |
|    approx_kl            | 0.000559518 |
|    clip_fraction        | 0.0186      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.271      |
|    explained_variance   | 0.000939    |
|    learning_rate        | 0.0002      |
|    loss                 | -0.0039     |
|    n_updates            | 7220        |
|    policy_gradient_loss | 0.000271    |
|    reward               | 0.0         |
|    value_loss           | 0.0203      |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.825        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 724          |
|    time_elapsed         | 4419         |
|    total_timesteps      | 741376       |
| train/                  |              |
|    approx_kl            | 0.0009001748 |
|    clip_fraction        | 0.0128       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.241       |
|    explained_variance   | 0.00268      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0127       |
|    n_updates            | 7230         |
|    policy_gradient_loss | -0.00203     |
|    reward               | 0.0          |
|    value_loss           | 0.0212       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.823         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 725           |
|    time_elapsed         | 4426          |
|    total_timesteps      | 742400        |
| train/                  |               |
|    approx_kl            | 0.00052640616 |
|    clip_fraction        | 0.00381       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.24         |
|    explained_variance   | 0.002         |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0123        |
|    n_updates            | 7240          |
|    policy_gradient_loss | 0.000459      |
|    reward               | 0.0018058756  |
|    value_loss           | 0.027         |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.812        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 726          |
|    time_elapsed         | 4432         |
|    total_timesteps      | 743424       |
| train/                  |              |
|    approx_kl            | 0.0008757136 |
|    clip_fraction        | 0.00664      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.228       |
|    explained_variance   | 0.000863     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00585     |
|    n_updates            | 7250         |
|    policy_gradient_loss | -0.00201     |
|    reward               | 0.0          |
|    value_loss           | 0.0274       |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 0.812       |
| time/                   |             |
|    fps                  | 167         |
|    iterations           | 727         |
|    time_elapsed         | 4438        |
|    total_timesteps      | 744448      |
| train/                  |             |
|    approx_kl            | 0.000964817 |
|    clip_fraction        | 0.0177      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.226      |
|    explained_variance   | 0.00117     |
|    learning_rate        | 0.0002      |
|    loss                 | 0.000116    |
|    n_updates            | 7260        |
|    policy_gradient_loss | -0.000644   |
|    reward               | 0.01513737  |
|    value_loss           | 0.0216      |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.803        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 728          |
|    time_elapsed         | 4444         |
|    total_timesteps      | 745472       |
| train/                  |              |
|    approx_kl            | 0.0011564037 |
|    clip_fraction        | 0.0144       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.214       |
|    explained_variance   | 0.000942     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00231     |
|    n_updates            | 7270         |
|    policy_gradient_loss | -0.00272     |
|    reward               | 0.0          |
|    value_loss           | 0.0205       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.802        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 729          |
|    time_elapsed         | 4451         |
|    total_timesteps      | 746496       |
| train/                  |              |
|    approx_kl            | 0.0003878355 |
|    clip_fraction        | 0.00898      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.244       |
|    explained_variance   | 0.000628     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00258     |
|    n_updates            | 7280         |
|    policy_gradient_loss | 0.000305     |
|    reward               | -0.021743488 |
|    value_loss           | 0.0253       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.807        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 730          |
|    time_elapsed         | 4457         |
|    total_timesteps      | 747520       |
| train/                  |              |
|    approx_kl            | 0.0009412012 |
|    clip_fraction        | 0.0218       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.262       |
|    explained_variance   | 0.00121      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0143       |
|    n_updates            | 7290         |
|    policy_gradient_loss | 0.00035      |
|    reward               | 0.018004976  |
|    value_loss           | 0.0272       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.806        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 731          |
|    time_elapsed         | 4463         |
|    total_timesteps      | 748544       |
| train/                  |              |
|    approx_kl            | 0.0006246969 |
|    clip_fraction        | 0.00205      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.259       |
|    explained_variance   | 0.00082      |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00674     |
|    n_updates            | 7300         |
|    policy_gradient_loss | 0.000251     |
|    reward               | 0.041915983  |
|    value_loss           | 0.023        |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.807        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 732          |
|    time_elapsed         | 4469         |
|    total_timesteps      | 749568       |
| train/                  |              |
|    approx_kl            | 0.0013942005 |
|    clip_fraction        | 0.0201       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.234       |
|    explained_variance   | 0.00122      |
|    learning_rate        | 0.0002       |
|    loss                 | -0.0122      |
|    n_updates            | 7310         |
|    policy_gradient_loss | -0.00303     |
|    reward               | 0.0          |
|    value_loss           | 0.0183       |
------------------------------------------


--------------------------------------------
| rollout/                |                |
|    ep_len_mean          | 1e+03          |
|    ep_rew_mean          | 0.807          |
| time/                   |                |
|    fps                  | 167            |
|    iterations           | 733            |
|    time_elapsed         | 4475           |
|    total_timesteps      | 750592         |
| train/                  |                |
|    approx_kl            | 0.00056729955  |
|    clip_fraction        | 0.0127         |
|    clip_range           | 0.2            |
|    entropy_loss         | -0.203         |
|    explained_variance   | 0.00104        |
|    learning_rate        | 0.0002         |
|    loss                 | 0.00716        |
|    n_updates            | 7320           |
|    policy_gradient_loss | -0.00266       |
|    reward               | -0.00090318255 |
|    value_loss           | 0.0245         |
--------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.8          |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 734          |
|    time_elapsed         | 4481         |
|    total_timesteps      | 751616       |
| train/                  |              |
|    approx_kl            | 0.0007878356 |
|    clip_fraction        | 0.0112       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.195       |
|    explained_variance   | 2.72e-05     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0106       |
|    n_updates            | 7330         |
|    policy_gradient_loss | -0.00103     |
|    reward               | 0.0          |
|    value_loss           | 0.0287       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.803         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 735           |
|    time_elapsed         | 4487          |
|    total_timesteps      | 752640        |
| train/                  |               |
|    approx_kl            | 0.00029331504 |
|    clip_fraction        | 0.00732       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.222        |
|    explained_variance   | 0.000301      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0243        |
|    n_updates            | 7340          |
|    policy_gradient_loss | 0.0006        |
|    reward               | 0.0           |
|    value_loss           | 0.028         |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.806        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 736          |
|    time_elapsed         | 4493         |
|    total_timesteps      | 753664       |
| train/                  |              |
|    approx_kl            | 0.0007591564 |
|    clip_fraction        | 0.0128       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.254       |
|    explained_variance   | 0.000237     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.001       |
|    n_updates            | 7350         |
|    policy_gradient_loss | 1.19e-05     |
|    reward               | 0.0          |
|    value_loss           | 0.0271       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.814        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 737          |
|    time_elapsed         | 4501         |
|    total_timesteps      | 754688       |
| train/                  |              |
|    approx_kl            | 0.0012583997 |
|    clip_fraction        | 0.018        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.221       |
|    explained_variance   | 0.00173      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00541      |
|    n_updates            | 7360         |
|    policy_gradient_loss | -0.00228     |
|    reward               | 0.030472565  |
|    value_loss           | 0.0163       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.809        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 738          |
|    time_elapsed         | 4507         |
|    total_timesteps      | 755712       |
| train/                  |              |
|    approx_kl            | 0.0020583791 |
|    clip_fraction        | 0.0161       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.253       |
|    explained_variance   | 0.000952     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0152       |
|    n_updates            | 7370         |
|    policy_gradient_loss | 0.000189     |
|    reward               | 0.015821619  |
|    value_loss           | 0.0245       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.807         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 739           |
|    time_elapsed         | 4513          |
|    total_timesteps      | 756736        |
| train/                  |               |
|    approx_kl            | 0.00034216058 |
|    clip_fraction        | 0.00205       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.274        |
|    explained_variance   | 0.00106       |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00143       |
|    n_updates            | 7380          |
|    policy_gradient_loss | 0.000725      |
|    reward               | -0.00020002   |
|    value_loss           | 0.0201        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.807        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 740          |
|    time_elapsed         | 4519         |
|    total_timesteps      | 757760       |
| train/                  |              |
|    approx_kl            | 0.0012847357 |
|    clip_fraction        | 0.00703      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.246       |
|    explained_variance   | 0.000389     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00729     |
|    n_updates            | 7390         |
|    policy_gradient_loss | -0.00138     |
|    reward               | 0.0          |
|    value_loss           | 0.0233       |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 0.798       |
| time/                   |             |
|    fps                  | 167         |
|    iterations           | 741         |
|    time_elapsed         | 4526        |
|    total_timesteps      | 758784      |
| train/                  |             |
|    approx_kl            | 0.004297848 |
|    clip_fraction        | 0.0114      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.27       |
|    explained_variance   | 0.000425    |
|    learning_rate        | 0.0002      |
|    loss                 | 0.00666     |
|    n_updates            | 7400        |
|    policy_gradient_loss | -0.000521   |
|    reward               | 0.0         |
|    value_loss           | 0.0296      |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.796        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 742          |
|    time_elapsed         | 4532         |
|    total_timesteps      | 759808       |
| train/                  |              |
|    approx_kl            | 0.0004068393 |
|    clip_fraction        | 0.0131       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.287       |
|    explained_variance   | -0.00126     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.002       |
|    n_updates            | 7410         |
|    policy_gradient_loss | -0.00072     |
|    reward               | 0.066669434  |
|    value_loss           | 0.021        |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.791        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 743          |
|    time_elapsed         | 4539         |
|    total_timesteps      | 760832       |
| train/                  |              |
|    approx_kl            | 0.0005668547 |
|    clip_fraction        | 0.0158       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.293       |
|    explained_variance   | -4.05e-06    |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00184      |
|    n_updates            | 7420         |
|    policy_gradient_loss | -0.000825    |
|    reward               | -0.1788779   |
|    value_loss           | 0.024        |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.788        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 744          |
|    time_elapsed         | 4545         |
|    total_timesteps      | 761856       |
| train/                  |              |
|    approx_kl            | 0.0009213425 |
|    clip_fraction        | 0.00811      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.268       |
|    explained_variance   | 0.000205     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00517      |
|    n_updates            | 7430         |
|    policy_gradient_loss | -6.38e-05    |
|    reward               | 0.004212693  |
|    value_loss           | 0.0217       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.786        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 745          |
|    time_elapsed         | 4552         |
|    total_timesteps      | 762880       |
| train/                  |              |
|    approx_kl            | 0.0010655026 |
|    clip_fraction        | 0.0133       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.274       |
|    explained_variance   | 0.000806     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00254      |
|    n_updates            | 7440         |
|    policy_gradient_loss | 8.43e-05     |
|    reward               | 0.0          |
|    value_loss           | 0.0261       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.77         |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 746          |
|    time_elapsed         | 4559         |
|    total_timesteps      | 763904       |
| train/                  |              |
|    approx_kl            | 0.0010232427 |
|    clip_fraction        | 0.0146       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.272       |
|    explained_variance   | 0.000189     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.000144    |
|    n_updates            | 7450         |
|    policy_gradient_loss | -0.000914    |
|    reward               | 0.00128735   |
|    value_loss           | 0.0215       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.775        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 747          |
|    time_elapsed         | 4566         |
|    total_timesteps      | 764928       |
| train/                  |              |
|    approx_kl            | 0.0008955525 |
|    clip_fraction        | 0.0184       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.236       |
|    explained_variance   | 0.000664     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00339      |
|    n_updates            | 7460         |
|    policy_gradient_loss | -0.00302     |
|    reward               | -0.07022179  |
|    value_loss           | 0.0207       |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 0.782       |
| time/                   |             |
|    fps                  | 167         |
|    iterations           | 748         |
|    time_elapsed         | 4573        |
|    total_timesteps      | 765952      |
| train/                  |             |
|    approx_kl            | 0.001163506 |
|    clip_fraction        | 0.025       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.266      |
|    explained_variance   | 0.000354    |
|    learning_rate        | 0.0002      |
|    loss                 | -0.000158   |
|    n_updates            | 7470        |
|    policy_gradient_loss | 2.87e-05    |
|    reward               | -0.03031474 |
|    value_loss           | 0.0259      |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.773        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 749          |
|    time_elapsed         | 4581         |
|    total_timesteps      | 766976       |
| train/                  |              |
|    approx_kl            | 0.0009007508 |
|    clip_fraction        | 0.0169       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.263       |
|    explained_variance   | 0.0007       |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00617      |
|    n_updates            | 7480         |
|    policy_gradient_loss | -0.00122     |
|    reward               | 0.008187967  |
|    value_loss           | 0.0229       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.782        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 750          |
|    time_elapsed         | 4587         |
|    total_timesteps      | 768000       |
| train/                  |              |
|    approx_kl            | 0.0017715575 |
|    clip_fraction        | 0.0244       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.233       |
|    explained_variance   | 0.00156      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00974      |
|    n_updates            | 7490         |
|    policy_gradient_loss | -0.00365     |
|    reward               | 0.0          |
|    value_loss           | 0.0202       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.778        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 751          |
|    time_elapsed         | 4593         |
|    total_timesteps      | 769024       |
| train/                  |              |
|    approx_kl            | 0.0006313874 |
|    clip_fraction        | 0.017        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.213       |
|    explained_variance   | 0.00201      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00478      |
|    n_updates            | 7500         |
|    policy_gradient_loss | -0.0022      |
|    reward               | 0.0032445167 |
|    value_loss           | 0.0237       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.78          |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 752           |
|    time_elapsed         | 4599          |
|    total_timesteps      | 770048        |
| train/                  |               |
|    approx_kl            | 0.00056240207 |
|    clip_fraction        | 0.0111        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.201        |
|    explained_variance   | 0.000741      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0163        |
|    n_updates            | 7510          |
|    policy_gradient_loss | -0.00175      |
|    reward               | 0.087605774   |
|    value_loss           | 0.0278        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.769         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 753           |
|    time_elapsed         | 4606          |
|    total_timesteps      | 771072        |
| train/                  |               |
|    approx_kl            | 0.00056686526 |
|    clip_fraction        | 0.0146        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.251        |
|    explained_variance   | 0.000718      |
|    learning_rate        | 0.0002        |
|    loss                 | 7.01e-05      |
|    n_updates            | 7520          |
|    policy_gradient_loss | 8.25e-05      |
|    reward               | 0.037993748   |
|    value_loss           | 0.0271        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.758         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 754           |
|    time_elapsed         | 4612          |
|    total_timesteps      | 772096        |
| train/                  |               |
|    approx_kl            | 0.00064356055 |
|    clip_fraction        | 0.0111        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.246        |
|    explained_variance   | 0.00251       |
|    learning_rate        | 0.0002        |
|    loss                 | -0.00652      |
|    n_updates            | 7530          |
|    policy_gradient_loss | -0.00154      |
|    reward               | 0.017359046   |
|    value_loss           | 0.019         |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.754        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 755          |
|    time_elapsed         | 4618         |
|    total_timesteps      | 773120       |
| train/                  |              |
|    approx_kl            | 0.0007550519 |
|    clip_fraction        | 0.00615      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.21        |
|    explained_variance   | 0.00103      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00855      |
|    n_updates            | 7540         |
|    policy_gradient_loss | -0.00113     |
|    reward               | 0.012608634  |
|    value_loss           | 0.0218       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.746        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 756          |
|    time_elapsed         | 4624         |
|    total_timesteps      | 774144       |
| train/                  |              |
|    approx_kl            | 0.0012820009 |
|    clip_fraction        | 0.0259       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.237       |
|    explained_variance   | 0.00178      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0106       |
|    n_updates            | 7550         |
|    policy_gradient_loss | -0.000486    |
|    reward               | 0.0          |
|    value_loss           | 0.0238       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.748        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 757          |
|    time_elapsed         | 4629         |
|    total_timesteps      | 775168       |
| train/                  |              |
|    approx_kl            | 0.0009057471 |
|    clip_fraction        | 0.0151       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.214       |
|    explained_variance   | 0.000473     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00889      |
|    n_updates            | 7560         |
|    policy_gradient_loss | -0.00256     |
|    reward               | 0.14572738   |
|    value_loss           | 0.0266       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.751        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 758          |
|    time_elapsed         | 4635         |
|    total_timesteps      | 776192       |
| train/                  |              |
|    approx_kl            | 0.0010302995 |
|    clip_fraction        | 0.0136       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.241       |
|    explained_variance   | 0.00072      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0252       |
|    n_updates            | 7570         |
|    policy_gradient_loss | 0.000379     |
|    reward               | 0.0          |
|    value_loss           | 0.0244       |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 0.754       |
| time/                   |             |
|    fps                  | 167         |
|    iterations           | 759         |
|    time_elapsed         | 4641        |
|    total_timesteps      | 777216      |
| train/                  |             |
|    approx_kl            | 0.000893404 |
|    clip_fraction        | 0.015       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.252      |
|    explained_variance   | 0.00089     |
|    learning_rate        | 0.0002      |
|    loss                 | 0.00792     |
|    n_updates            | 7580        |
|    policy_gradient_loss | -0.000634   |
|    reward               | 0.0         |
|    value_loss           | 0.0222      |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.762        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 760          |
|    time_elapsed         | 4647         |
|    total_timesteps      | 778240       |
| train/                  |              |
|    approx_kl            | 0.0005955057 |
|    clip_fraction        | 0.00479      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.261       |
|    explained_variance   | 0.00108      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0118       |
|    n_updates            | 7590         |
|    policy_gradient_loss | 0.000813     |
|    reward               | -0.02139261  |
|    value_loss           | 0.0264       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.759        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 761          |
|    time_elapsed         | 4653         |
|    total_timesteps      | 779264       |
| train/                  |              |
|    approx_kl            | 0.0003623828 |
|    clip_fraction        | 0.0042       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.248       |
|    explained_variance   | 0.000935     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00528      |
|    n_updates            | 7600         |
|    policy_gradient_loss | -0.000992    |
|    reward               | 0.0          |
|    value_loss           | 0.0164       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.743        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 762          |
|    time_elapsed         | 4660         |
|    total_timesteps      | 780288       |
| train/                  |              |
|    approx_kl            | 0.0011647743 |
|    clip_fraction        | 0.021        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.241       |
|    explained_variance   | 2.99e-05     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00367      |
|    n_updates            | 7610         |
|    policy_gradient_loss | -0.00251     |
|    reward               | 0.0030836784 |
|    value_loss           | 0.0246       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.736        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 763          |
|    time_elapsed         | 4665         |
|    total_timesteps      | 781312       |
| train/                  |              |
|    approx_kl            | 0.0005242749 |
|    clip_fraction        | 0.00586      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.268       |
|    explained_variance   | 0.00067      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00364      |
|    n_updates            | 7620         |
|    policy_gradient_loss | 0.00138      |
|    reward               | -0.41545123  |
|    value_loss           | 0.0218       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.738        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 764          |
|    time_elapsed         | 4671         |
|    total_timesteps      | 782336       |
| train/                  |              |
|    approx_kl            | 0.0013669035 |
|    clip_fraction        | 0.0235       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.273       |
|    explained_variance   | 0.00109      |
|    learning_rate        | 0.0002       |
|    loss                 | -0.0144      |
|    n_updates            | 7630         |
|    policy_gradient_loss | -0.00244     |
|    reward               | 0.105861455  |
|    value_loss           | 0.0211       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.74          |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 765           |
|    time_elapsed         | 4677          |
|    total_timesteps      | 783360        |
| train/                  |               |
|    approx_kl            | 0.00080243126 |
|    clip_fraction        | 0.00166       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.269        |
|    explained_variance   | 0.000678      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00699       |
|    n_updates            | 7640          |
|    policy_gradient_loss | 0.000228      |
|    reward               | 0.0           |
|    value_loss           | 0.0223        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.739         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 766           |
|    time_elapsed         | 4683          |
|    total_timesteps      | 784384        |
| train/                  |               |
|    approx_kl            | 0.00012346904 |
|    clip_fraction        | 0.00352       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.259        |
|    explained_variance   | 0.000662      |
|    learning_rate        | 0.0002        |
|    loss                 | -0.00457      |
|    n_updates            | 7650          |
|    policy_gradient_loss | 0.00107       |
|    reward               | 0.0           |
|    value_loss           | 0.0268        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.732        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 767          |
|    time_elapsed         | 4689         |
|    total_timesteps      | 785408       |
| train/                  |              |
|    approx_kl            | 0.0008755169 |
|    clip_fraction        | 0.0135       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.241       |
|    explained_variance   | 0.000385     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00421      |
|    n_updates            | 7660         |
|    policy_gradient_loss | -0.00183     |
|    reward               | 0.0          |
|    value_loss           | 0.0182       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.726        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 768          |
|    time_elapsed         | 4695         |
|    total_timesteps      | 786432       |
| train/                  |              |
|    approx_kl            | 0.0010023885 |
|    clip_fraction        | 0.012        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.252       |
|    explained_variance   | 0.00065      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00777      |
|    n_updates            | 7670         |
|    policy_gradient_loss | 0.000324     |
|    reward               | 0.03260069   |
|    value_loss           | 0.0316       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.741        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 769          |
|    time_elapsed         | 4701         |
|    total_timesteps      | 787456       |
| train/                  |              |
|    approx_kl            | 0.0011955749 |
|    clip_fraction        | 0.0227       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.262       |
|    explained_variance   | 0.00217      |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00104     |
|    n_updates            | 7680         |
|    policy_gradient_loss | -0.00251     |
|    reward               | -0.051566616 |
|    value_loss           | 0.0187       |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 0.739       |
| time/                   |             |
|    fps                  | 167         |
|    iterations           | 770         |
|    time_elapsed         | 4707        |
|    total_timesteps      | 788480      |
| train/                  |             |
|    approx_kl            | 0.001048153 |
|    clip_fraction        | 0.0128      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.288      |
|    explained_variance   | 0.00047     |
|    learning_rate        | 0.0002      |
|    loss                 | 0.014       |
|    n_updates            | 7690        |
|    policy_gradient_loss | 0.000698    |
|    reward               | 0.021755122 |
|    value_loss           | 0.0215      |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.743        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 771          |
|    time_elapsed         | 4713         |
|    total_timesteps      | 789504       |
| train/                  |              |
|    approx_kl            | 0.0006717529 |
|    clip_fraction        | 0.0129       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.268       |
|    explained_variance   | 0.000691     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00549     |
|    n_updates            | 7700         |
|    policy_gradient_loss | -0.00229     |
|    reward               | 0.0          |
|    value_loss           | 0.02         |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.751        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 772          |
|    time_elapsed         | 4719         |
|    total_timesteps      | 790528       |
| train/                  |              |
|    approx_kl            | 0.0011685628 |
|    clip_fraction        | 0.00957      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.282       |
|    explained_variance   | 0.000361     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00341      |
|    n_updates            | 7710         |
|    policy_gradient_loss | -0.00112     |
|    reward               | 0.025303062  |
|    value_loss           | 0.0246       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.742        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 773          |
|    time_elapsed         | 4725         |
|    total_timesteps      | 791552       |
| train/                  |              |
|    approx_kl            | 0.0013855749 |
|    clip_fraction        | 0.0253       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.231       |
|    explained_variance   | 0.000286     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00165      |
|    n_updates            | 7720         |
|    policy_gradient_loss | -0.00403     |
|    reward               | 0.0          |
|    value_loss           | 0.0248       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.742        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 774          |
|    time_elapsed         | 4731         |
|    total_timesteps      | 792576       |
| train/                  |              |
|    approx_kl            | 0.0011738373 |
|    clip_fraction        | 0.021        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.225       |
|    explained_variance   | 0.000775     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0034       |
|    n_updates            | 7730         |
|    policy_gradient_loss | -0.00148     |
|    reward               | 0.0          |
|    value_loss           | 0.0209       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.731        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 775          |
|    time_elapsed         | 4736         |
|    total_timesteps      | 793600       |
| train/                  |              |
|    approx_kl            | 0.0009459279 |
|    clip_fraction        | 0.0084       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.231       |
|    explained_variance   | 0.000128     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0055       |
|    n_updates            | 7740         |
|    policy_gradient_loss | 8.2e-05      |
|    reward               | 0.0          |
|    value_loss           | 0.0251       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.73         |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 776          |
|    time_elapsed         | 4742         |
|    total_timesteps      | 794624       |
| train/                  |              |
|    approx_kl            | 0.0011700193 |
|    clip_fraction        | 0.0129       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.215       |
|    explained_variance   | 0.000646     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00366      |
|    n_updates            | 7750         |
|    policy_gradient_loss | -0.00135     |
|    reward               | 0.0973588    |
|    value_loss           | 0.0196       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.728         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 777           |
|    time_elapsed         | 4748          |
|    total_timesteps      | 795648        |
| train/                  |               |
|    approx_kl            | 0.00095313706 |
|    clip_fraction        | 0.0131        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.19         |
|    explained_variance   | -0.001        |
|    learning_rate        | 0.0002        |
|    loss                 | 0.000997      |
|    n_updates            | 7760          |
|    policy_gradient_loss | -0.00247      |
|    reward               | 0.0           |
|    value_loss           | 0.0241        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.731         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 778           |
|    time_elapsed         | 4754          |
|    total_timesteps      | 796672        |
| train/                  |               |
|    approx_kl            | 0.00056871993 |
|    clip_fraction        | 0.0192        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.217        |
|    explained_variance   | -0.000325     |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00943       |
|    n_updates            | 7770          |
|    policy_gradient_loss | -0.000826     |
|    reward               | 0.044028286   |
|    value_loss           | 0.0275        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.728        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 779          |
|    time_elapsed         | 4760         |
|    total_timesteps      | 797696       |
| train/                  |              |
|    approx_kl            | 0.0006816619 |
|    clip_fraction        | 0.0083       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.223       |
|    explained_variance   | 0.000603     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00865      |
|    n_updates            | 7780         |
|    policy_gradient_loss | 0.000587     |
|    reward               | 0.0          |
|    value_loss           | 0.0352       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.725         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 780           |
|    time_elapsed         | 4765          |
|    total_timesteps      | 798720        |
| train/                  |               |
|    approx_kl            | 0.00089611416 |
|    clip_fraction        | 0.00947       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.23         |
|    explained_variance   | 0.000323      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00492       |
|    n_updates            | 7790          |
|    policy_gradient_loss | 0.000769      |
|    reward               | 0.0           |
|    value_loss           | 0.0246        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.721         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 781           |
|    time_elapsed         | 4771          |
|    total_timesteps      | 799744        |
| train/                  |               |
|    approx_kl            | 0.00090517115 |
|    clip_fraction        | 0.00781       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.218        |
|    explained_variance   | 0.000618      |
|    learning_rate        | 0.0002        |
|    loss                 | -0.00836      |
|    n_updates            | 7800          |
|    policy_gradient_loss | -0.000533     |
|    reward               | 0.027720762   |
|    value_loss           | 0.0216        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.717         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 782           |
|    time_elapsed         | 4777          |
|    total_timesteps      | 800768        |
| train/                  |               |
|    approx_kl            | 0.00085427915 |
|    clip_fraction        | 0.0151        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.203        |
|    explained_variance   | 0.000436      |
|    learning_rate        | 0.0002        |
|    loss                 | -0.00995      |
|    n_updates            | 7810          |
|    policy_gradient_loss | -0.00211      |
|    reward               | 0.013439458   |
|    value_loss           | 0.0255        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.711         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 783           |
|    time_elapsed         | 4783          |
|    total_timesteps      | 801792        |
| train/                  |               |
|    approx_kl            | 0.0032544166  |
|    clip_fraction        | 0.0198        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.241        |
|    explained_variance   | 0.000742      |
|    learning_rate        | 0.0002        |
|    loss                 | -0.0116       |
|    n_updates            | 7820          |
|    policy_gradient_loss | -0.00016      |
|    reward               | -0.0035141166 |
|    value_loss           | 0.0255        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.711        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 784          |
|    time_elapsed         | 4789         |
|    total_timesteps      | 802816       |
| train/                  |              |
|    approx_kl            | 0.0015473976 |
|    clip_fraction        | 0.027        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.229       |
|    explained_variance   | 0.000747     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00745     |
|    n_updates            | 7830         |
|    policy_gradient_loss | -0.00527     |
|    reward               | 0.01717328   |
|    value_loss           | 0.0206       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.716        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 785          |
|    time_elapsed         | 4795         |
|    total_timesteps      | 803840       |
| train/                  |              |
|    approx_kl            | 0.0007638738 |
|    clip_fraction        | 0.0139       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.251       |
|    explained_variance   | -5.03e-05    |
|    learning_rate        | 0.0002       |
|    loss                 | 0.01         |
|    n_updates            | 7840         |
|    policy_gradient_loss | 0.000754     |
|    reward               | 0.0          |
|    value_loss           | 0.0303       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.718        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 786          |
|    time_elapsed         | 4801         |
|    total_timesteps      | 804864       |
| train/                  |              |
|    approx_kl            | 0.0010120553 |
|    clip_fraction        | 0.0206       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.244       |
|    explained_variance   | 0.000371     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00129      |
|    n_updates            | 7850         |
|    policy_gradient_loss | -0.00215     |
|    reward               | 0.0013892913 |
|    value_loss           | 0.0219       |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 0.717       |
| time/                   |             |
|    fps                  | 167         |
|    iterations           | 787         |
|    time_elapsed         | 4807        |
|    total_timesteps      | 805888      |
| train/                  |             |
|    approx_kl            | 0.001085805 |
|    clip_fraction        | 0.0247      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.221      |
|    explained_variance   | 0.000711    |
|    learning_rate        | 0.0002      |
|    loss                 | 0.00107     |
|    n_updates            | 7860        |
|    policy_gradient_loss | -0.00403    |
|    reward               | 0.003648253 |
|    value_loss           | 0.0186      |
-----------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.712         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 788           |
|    time_elapsed         | 4813          |
|    total_timesteps      | 806912        |
| train/                  |               |
|    approx_kl            | 0.00043408136 |
|    clip_fraction        | 0.00361       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.205        |
|    explained_variance   | 0.00111       |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00052       |
|    n_updates            | 7870          |
|    policy_gradient_loss | -0.000839     |
|    reward               | 0.0           |
|    value_loss           | 0.018         |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.717         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 789           |
|    time_elapsed         | 4819          |
|    total_timesteps      | 807936        |
| train/                  |               |
|    approx_kl            | 0.001161426   |
|    clip_fraction        | 0.0226        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.204        |
|    explained_variance   | 0.000561      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0138        |
|    n_updates            | 7880          |
|    policy_gradient_loss | -0.00266      |
|    reward               | 0.00097557774 |
|    value_loss           | 0.0275        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.718         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 790           |
|    time_elapsed         | 4825          |
|    total_timesteps      | 808960        |
| train/                  |               |
|    approx_kl            | 0.00050020224 |
|    clip_fraction        | 0.00781       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.212        |
|    explained_variance   | 0.000532      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00708       |
|    n_updates            | 7890          |
|    policy_gradient_loss | 0.000643      |
|    reward               | 0.01306342    |
|    value_loss           | 0.0299        |
-------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 0.712       |
| time/                   |             |
|    fps                  | 167         |
|    iterations           | 791         |
|    time_elapsed         | 4831        |
|    total_timesteps      | 809984      |
| train/                  |             |
|    approx_kl            | 0.000643931 |
|    clip_fraction        | 0.0113      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.202      |
|    explained_variance   | 0.000835    |
|    learning_rate        | 0.0002      |
|    loss                 | -0.0108     |
|    n_updates            | 7900        |
|    policy_gradient_loss | -0.00168    |
|    reward               | 0.0         |
|    value_loss           | 0.0227      |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.707        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 792          |
|    time_elapsed         | 4837         |
|    total_timesteps      | 811008       |
| train/                  |              |
|    approx_kl            | 0.0012402062 |
|    clip_fraction        | 0.016        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.222       |
|    explained_variance   | 0.000606     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0123       |
|    n_updates            | 7910         |
|    policy_gradient_loss | 0.000335     |
|    reward               | 0.0          |
|    value_loss           | 0.0343       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.705        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 793          |
|    time_elapsed         | 4843         |
|    total_timesteps      | 812032       |
| train/                  |              |
|    approx_kl            | 0.0015601777 |
|    clip_fraction        | 0.0314       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.247       |
|    explained_variance   | 0.000291     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00248      |
|    n_updates            | 7920         |
|    policy_gradient_loss | -0.00227     |
|    reward               | 0.0030854463 |
|    value_loss           | 0.0329       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.713        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 794          |
|    time_elapsed         | 4849         |
|    total_timesteps      | 813056       |
| train/                  |              |
|    approx_kl            | 0.0007974735 |
|    clip_fraction        | 0.00898      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.243       |
|    explained_variance   | 0.000466     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0243       |
|    n_updates            | 7930         |
|    policy_gradient_loss | -0.000651    |
|    reward               | 0.0035214715 |
|    value_loss           | 0.0264       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.718         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 795           |
|    time_elapsed         | 4855          |
|    total_timesteps      | 814080        |
| train/                  |               |
|    approx_kl            | 0.00062491134 |
|    clip_fraction        | 0.00381       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.263        |
|    explained_variance   | 0.000709      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00287       |
|    n_updates            | 7940          |
|    policy_gradient_loss | 0.0008        |
|    reward               | 0.02135303    |
|    value_loss           | 0.0236        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.711        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 796          |
|    time_elapsed         | 4861         |
|    total_timesteps      | 815104       |
| train/                  |              |
|    approx_kl            | 0.0012699184 |
|    clip_fraction        | 0.0154       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.259       |
|    explained_variance   | -0.000246    |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00301      |
|    n_updates            | 7950         |
|    policy_gradient_loss | -0.00235     |
|    reward               | 0.0          |
|    value_loss           | 0.0176       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.711        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 797          |
|    time_elapsed         | 4867         |
|    total_timesteps      | 816128       |
| train/                  |              |
|    approx_kl            | 0.0010855695 |
|    clip_fraction        | 0.0198       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.233       |
|    explained_variance   | 0.000124     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00332      |
|    n_updates            | 7960         |
|    policy_gradient_loss | -0.00335     |
|    reward               | 0.0032278185 |
|    value_loss           | 0.0202       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.713        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 798          |
|    time_elapsed         | 4873         |
|    total_timesteps      | 817152       |
| train/                  |              |
|    approx_kl            | 0.0004884794 |
|    clip_fraction        | 0.0124       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.255       |
|    explained_variance   | 0.000798     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00302      |
|    n_updates            | 7970         |
|    policy_gradient_loss | 0.000633     |
|    reward               | 0.0          |
|    value_loss           | 0.0254       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.713        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 799          |
|    time_elapsed         | 4879         |
|    total_timesteps      | 818176       |
| train/                  |              |
|    approx_kl            | 0.0006798431 |
|    clip_fraction        | 0.00908      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.272       |
|    explained_variance   | 0.000683     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00584      |
|    n_updates            | 7980         |
|    policy_gradient_loss | 0.000938     |
|    reward               | 0.0          |
|    value_loss           | 0.0265       |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 0.719       |
| time/                   |             |
|    fps                  | 167         |
|    iterations           | 800         |
|    time_elapsed         | 4885        |
|    total_timesteps      | 819200      |
| train/                  |             |
|    approx_kl            | 0.001777706 |
|    clip_fraction        | 0.0256      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.318      |
|    explained_variance   | 0.000813    |
|    learning_rate        | 0.0002      |
|    loss                 | 0.012       |
|    n_updates            | 7990        |
|    policy_gradient_loss | 0.000207    |
|    reward               | -0.00273285 |
|    value_loss           | 0.0273      |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.735        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 801          |
|    time_elapsed         | 4891         |
|    total_timesteps      | 820224       |
| train/                  |              |
|    approx_kl            | 0.0013585959 |
|    clip_fraction        | 0.0315       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.354       |
|    explained_variance   | 0.00101      |
|    learning_rate        | 0.0002       |
|    loss                 | -0.0019      |
|    n_updates            | 8000         |
|    policy_gradient_loss | -0.000106    |
|    reward               | 0.0          |
|    value_loss           | 0.0176       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.747        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 802          |
|    time_elapsed         | 4897         |
|    total_timesteps      | 821248       |
| train/                  |              |
|    approx_kl            | 0.0017440028 |
|    clip_fraction        | 0.0186       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.313       |
|    explained_variance   | 0.00169      |
|    learning_rate        | 0.0002       |
|    loss                 | -0.0126      |
|    n_updates            | 8010         |
|    policy_gradient_loss | -0.0041      |
|    reward               | -0.005348031 |
|    value_loss           | 0.0123       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.731         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 803           |
|    time_elapsed         | 4903          |
|    total_timesteps      | 822272        |
| train/                  |               |
|    approx_kl            | 0.00012695423 |
|    clip_fraction        | 0.004         |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.319        |
|    explained_variance   | -0.000326     |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00458       |
|    n_updates            | 8020          |
|    policy_gradient_loss | 0.00126       |
|    reward               | 0.0           |
|    value_loss           | 0.02          |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.725         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 804           |
|    time_elapsed         | 4909          |
|    total_timesteps      | 823296        |
| train/                  |               |
|    approx_kl            | 0.00093271123 |
|    clip_fraction        | 0.0192        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.282        |
|    explained_variance   | 0.000293      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00166       |
|    n_updates            | 8030          |
|    policy_gradient_loss | -0.00366      |
|    reward               | 0.019222468   |
|    value_loss           | 0.0167        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.727        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 805          |
|    time_elapsed         | 4915         |
|    total_timesteps      | 824320       |
| train/                  |              |
|    approx_kl            | 0.0013137341 |
|    clip_fraction        | 0.0161       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.318       |
|    explained_variance   | 0.000444     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00992      |
|    n_updates            | 8040         |
|    policy_gradient_loss | 0.000541     |
|    reward               | -0.055132    |
|    value_loss           | 0.0302       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.717        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 806          |
|    time_elapsed         | 4920         |
|    total_timesteps      | 825344       |
| train/                  |              |
|    approx_kl            | 0.0017835326 |
|    clip_fraction        | 0.0226       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.263       |
|    explained_variance   | 0.000668     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00449     |
|    n_updates            | 8050         |
|    policy_gradient_loss | -0.00361     |
|    reward               | 0.0          |
|    value_loss           | 0.0184       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.734        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 807          |
|    time_elapsed         | 4926         |
|    total_timesteps      | 826368       |
| train/                  |              |
|    approx_kl            | 0.0011286946 |
|    clip_fraction        | 0.0117       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.255       |
|    explained_variance   | 0.00066      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00146      |
|    n_updates            | 8060         |
|    policy_gradient_loss | -0.00102     |
|    reward               | 0.007129884  |
|    value_loss           | 0.023        |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.739         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 808           |
|    time_elapsed         | 4932          |
|    total_timesteps      | 827392        |
| train/                  |               |
|    approx_kl            | 0.00060586364 |
|    clip_fraction        | 0.00947       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.278        |
|    explained_variance   | 0.000222      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00325       |
|    n_updates            | 8070          |
|    policy_gradient_loss | 0.000439      |
|    reward               | 0.022694249   |
|    value_loss           | 0.0207        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.74         |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 809          |
|    time_elapsed         | 4939         |
|    total_timesteps      | 828416       |
| train/                  |              |
|    approx_kl            | 0.0015602099 |
|    clip_fraction        | 0.0199       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.253       |
|    explained_variance   | 0.000962     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00123      |
|    n_updates            | 8080         |
|    policy_gradient_loss | -0.00248     |
|    reward               | 0.05913333   |
|    value_loss           | 0.0155       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.737        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 810          |
|    time_elapsed         | 4944         |
|    total_timesteps      | 829440       |
| train/                  |              |
|    approx_kl            | 0.0013946916 |
|    clip_fraction        | 0.0229       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.235       |
|    explained_variance   | 0.000218     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00786     |
|    n_updates            | 8090         |
|    policy_gradient_loss | -0.00332     |
|    reward               | -0.03889834  |
|    value_loss           | 0.022        |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.74         |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 811          |
|    time_elapsed         | 4950         |
|    total_timesteps      | 830464       |
| train/                  |              |
|    approx_kl            | 0.0011633675 |
|    clip_fraction        | 0.0243       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.27        |
|    explained_variance   | 0.000266     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0106       |
|    n_updates            | 8100         |
|    policy_gradient_loss | 5.09e-05     |
|    reward               | 0.084651016  |
|    value_loss           | 0.0248       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.733        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 812          |
|    time_elapsed         | 4956         |
|    total_timesteps      | 831488       |
| train/                  |              |
|    approx_kl            | 0.0006423084 |
|    clip_fraction        | 0.013        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.287       |
|    explained_variance   | 0.00079      |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00488     |
|    n_updates            | 8110         |
|    policy_gradient_loss | -0.000381    |
|    reward               | 0.0011201537 |
|    value_loss           | 0.0215       |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 0.735       |
| time/                   |             |
|    fps                  | 167         |
|    iterations           | 813         |
|    time_elapsed         | 4962        |
|    total_timesteps      | 832512      |
| train/                  |             |
|    approx_kl            | 0.001448336 |
|    clip_fraction        | 0.0253      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.246      |
|    explained_variance   | 0.00128     |
|    learning_rate        | 0.0002      |
|    loss                 | 0.000195    |
|    n_updates            | 8120        |
|    policy_gradient_loss | -0.00385    |
|    reward               | 0.033532716 |
|    value_loss           | 0.0227      |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.733        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 814          |
|    time_elapsed         | 4967         |
|    total_timesteps      | 833536       |
| train/                  |              |
|    approx_kl            | 0.0006535455 |
|    clip_fraction        | 0.0104       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.245       |
|    explained_variance   | 0.00109      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00652      |
|    n_updates            | 8130         |
|    policy_gradient_loss | -0.000257    |
|    reward               | 0.01668691   |
|    value_loss           | 0.0294       |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 0.73        |
| time/                   |             |
|    fps                  | 167         |
|    iterations           | 815         |
|    time_elapsed         | 4973        |
|    total_timesteps      | 834560      |
| train/                  |             |
|    approx_kl            | 0.000527664 |
|    clip_fraction        | 0.00898     |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.263      |
|    explained_variance   | 0.000385    |
|    learning_rate        | 0.0002      |
|    loss                 | 0.00437     |
|    n_updates            | 8140        |
|    policy_gradient_loss | 0.00024     |
|    reward               | 0.0         |
|    value_loss           | 0.0232      |
-----------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.721         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 816           |
|    time_elapsed         | 4979          |
|    total_timesteps      | 835584        |
| train/                  |               |
|    approx_kl            | 0.00011070294 |
|    clip_fraction        | 0.00264       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.238        |
|    explained_variance   | 0.000732      |
|    learning_rate        | 0.0002        |
|    loss                 | -0.000588     |
|    n_updates            | 8150          |
|    policy_gradient_loss | -0.000603     |
|    reward               | -0.06994249   |
|    value_loss           | 0.0226        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.717        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 817          |
|    time_elapsed         | 4985         |
|    total_timesteps      | 836608       |
| train/                  |              |
|    approx_kl            | 0.0006760905 |
|    clip_fraction        | 0.00215      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.23        |
|    explained_variance   | -0.000109    |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00247     |
|    n_updates            | 8160         |
|    policy_gradient_loss | -0.000236    |
|    reward               | 0.031781416  |
|    value_loss           | 0.0272       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.719        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 818          |
|    time_elapsed         | 4991         |
|    total_timesteps      | 837632       |
| train/                  |              |
|    approx_kl            | 0.0009856961 |
|    clip_fraction        | 0.00781      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.22        |
|    explained_variance   | 0.000433     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.0106      |
|    n_updates            | 8170         |
|    policy_gradient_loss | -0.000808    |
|    reward               | 0.0          |
|    value_loss           | 0.0253       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.721         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 819           |
|    time_elapsed         | 4997          |
|    total_timesteps      | 838656        |
| train/                  |               |
|    approx_kl            | 0.00057494035 |
|    clip_fraction        | 0.00605       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.235        |
|    explained_variance   | 0.000151      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0063        |
|    n_updates            | 8180          |
|    policy_gradient_loss | -5.65e-05     |
|    reward               | -0.019577704  |
|    value_loss           | 0.0253        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.72         |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 820          |
|    time_elapsed         | 5003         |
|    total_timesteps      | 839680       |
| train/                  |              |
|    approx_kl            | 0.0005530133 |
|    clip_fraction        | 0.00771      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.224       |
|    explained_variance   | 0.000576     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00629      |
|    n_updates            | 8190         |
|    policy_gradient_loss | -0.000197    |
|    reward               | 0.0404546    |
|    value_loss           | 0.0298       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.712        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 821          |
|    time_elapsed         | 5009         |
|    total_timesteps      | 840704       |
| train/                  |              |
|    approx_kl            | 0.0010226555 |
|    clip_fraction        | 0.0127       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.216       |
|    explained_variance   | 0.000825     |
|    learning_rate        | 0.0002       |
|    loss                 | 2.28e-05     |
|    n_updates            | 8200         |
|    policy_gradient_loss | -0.00125     |
|    reward               | -0.1283548   |
|    value_loss           | 0.0211       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.707        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 822          |
|    time_elapsed         | 5015         |
|    total_timesteps      | 841728       |
| train/                  |              |
|    approx_kl            | 0.0013621172 |
|    clip_fraction        | 0.0133       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.185       |
|    explained_variance   | 0.00039      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00649      |
|    n_updates            | 8210         |
|    policy_gradient_loss | -0.00213     |
|    reward               | 0.010281507  |
|    value_loss           | 0.0211       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.703        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 823          |
|    time_elapsed         | 5021         |
|    total_timesteps      | 842752       |
| train/                  |              |
|    approx_kl            | 0.0010074324 |
|    clip_fraction        | 0.0217       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.204       |
|    explained_variance   | 0.000255     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.000824     |
|    n_updates            | 8220         |
|    policy_gradient_loss | -0.00118     |
|    reward               | 0.06542992   |
|    value_loss           | 0.018        |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.701        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 824          |
|    time_elapsed         | 5027         |
|    total_timesteps      | 843776       |
| train/                  |              |
|    approx_kl            | 0.0003424785 |
|    clip_fraction        | 0.00566      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.204       |
|    explained_variance   | 0.000601     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00538      |
|    n_updates            | 8230         |
|    policy_gradient_loss | 0.000476     |
|    reward               | -0.005643383 |
|    value_loss           | 0.0285       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.709         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 825           |
|    time_elapsed         | 5033          |
|    total_timesteps      | 844800        |
| train/                  |               |
|    approx_kl            | 0.00078569533 |
|    clip_fraction        | 0.00557       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.217        |
|    explained_variance   | -0.000122     |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0178        |
|    n_updates            | 8240          |
|    policy_gradient_loss | 0.000674      |
|    reward               | 0.0           |
|    value_loss           | 0.0302        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.718        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 826          |
|    time_elapsed         | 5039         |
|    total_timesteps      | 845824       |
| train/                  |              |
|    approx_kl            | 0.0010096743 |
|    clip_fraction        | 0.0113       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.202       |
|    explained_variance   | 0.000277     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0135       |
|    n_updates            | 8250         |
|    policy_gradient_loss | -0.00198     |
|    reward               | 0.04195298   |
|    value_loss           | 0.0271       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.717         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 827           |
|    time_elapsed         | 5045          |
|    total_timesteps      | 846848        |
| train/                  |               |
|    approx_kl            | 0.00076171226 |
|    clip_fraction        | 0.00918       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.203        |
|    explained_variance   | 0.000409      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0013        |
|    n_updates            | 8260          |
|    policy_gradient_loss | -0.000415     |
|    reward               | 0.012621582   |
|    value_loss           | 0.0249        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.72         |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 828          |
|    time_elapsed         | 5051         |
|    total_timesteps      | 847872       |
| train/                  |              |
|    approx_kl            | 0.0010473656 |
|    clip_fraction        | 0.0202       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.22        |
|    explained_variance   | 0.000677     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00462      |
|    n_updates            | 8270         |
|    policy_gradient_loss | -0.000325    |
|    reward               | 0.0032038828 |
|    value_loss           | 0.0287       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.714         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 829           |
|    time_elapsed         | 5057          |
|    total_timesteps      | 848896        |
| train/                  |               |
|    approx_kl            | 0.00035854016 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.209        |
|    explained_variance   | 0.000451      |
|    learning_rate        | 0.0002        |
|    loss                 | -0.00516      |
|    n_updates            | 8280          |
|    policy_gradient_loss | 0.000192      |
|    reward               | 0.021453138   |
|    value_loss           | 0.0238        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.712         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 830           |
|    time_elapsed         | 5062          |
|    total_timesteps      | 849920        |
| train/                  |               |
|    approx_kl            | 0.00046414044 |
|    clip_fraction        | 0.0104        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.182        |
|    explained_variance   | 0.00108       |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0189        |
|    n_updates            | 8290          |
|    policy_gradient_loss | -0.00162      |
|    reward               | 0.020705013   |
|    value_loss           | 0.0194        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.714         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 831           |
|    time_elapsed         | 5068          |
|    total_timesteps      | 850944        |
| train/                  |               |
|    approx_kl            | 0.00056710106 |
|    clip_fraction        | 0.0116        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.152        |
|    explained_variance   | -0.000264     |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00591       |
|    n_updates            | 8300          |
|    policy_gradient_loss | -0.00255      |
|    reward               | 0.04106233    |
|    value_loss           | 0.0286        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.719         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 832           |
|    time_elapsed         | 5074          |
|    total_timesteps      | 851968        |
| train/                  |               |
|    approx_kl            | 0.00051063724 |
|    clip_fraction        | 0.011         |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.165        |
|    explained_variance   | 0.000343      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00478       |
|    n_updates            | 8310          |
|    policy_gradient_loss | 1.6e-05       |
|    reward               | 0.0033519254  |
|    value_loss           | 0.0251        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.717         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 833           |
|    time_elapsed         | 5080          |
|    total_timesteps      | 852992        |
| train/                  |               |
|    approx_kl            | 0.00034505496 |
|    clip_fraction        | 0.00576       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.185        |
|    explained_variance   | 0.000691      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00465       |
|    n_updates            | 8320          |
|    policy_gradient_loss | 0.000228      |
|    reward               | 0.0           |
|    value_loss           | 0.0276        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.721         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 834           |
|    time_elapsed         | 5086          |
|    total_timesteps      | 854016        |
| train/                  |               |
|    approx_kl            | 0.0005934939  |
|    clip_fraction        | 0.0104        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.192        |
|    explained_variance   | 0.000601      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00384       |
|    n_updates            | 8330          |
|    policy_gradient_loss | 0.000244      |
|    reward               | -0.0028076335 |
|    value_loss           | 0.0343        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.712        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 835          |
|    time_elapsed         | 5091         |
|    total_timesteps      | 855040       |
| train/                  |              |
|    approx_kl            | 0.0002942526 |
|    clip_fraction        | 0.00654      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.209       |
|    explained_variance   | -0.000888    |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00698      |
|    n_updates            | 8340         |
|    policy_gradient_loss | 0.000862     |
|    reward               | 0.0018930598 |
|    value_loss           | 0.0316       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.708         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 836           |
|    time_elapsed         | 5097          |
|    total_timesteps      | 856064        |
| train/                  |               |
|    approx_kl            | 0.00042147154 |
|    clip_fraction        | 0.00889       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.197        |
|    explained_variance   | -0.000296     |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00332       |
|    n_updates            | 8350          |
|    policy_gradient_loss | -0.00156      |
|    reward               | 0.073014945   |
|    value_loss           | 0.0257        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.712         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 837           |
|    time_elapsed         | 5103          |
|    total_timesteps      | 857088        |
| train/                  |               |
|    approx_kl            | 0.00082441035 |
|    clip_fraction        | 0.00986       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.194        |
|    explained_variance   | 0.000733      |
|    learning_rate        | 0.0002        |
|    loss                 | -0.00416      |
|    n_updates            | 8360          |
|    policy_gradient_loss | -0.00134      |
|    reward               | 0.0059077647  |
|    value_loss           | 0.0253        |
-------------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1e+03      |
|    ep_rew_mean          | 0.712      |
| time/                   |            |
|    fps                  | 167        |
|    iterations           | 838        |
|    time_elapsed         | 5109       |
|    total_timesteps      | 858112     |
| train/                  |            |
|    approx_kl            | 0.00056965 |
|    clip_fraction        | 0.00605    |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.199     |
|    explained_variance   | 0.000404   |
|    learning_rate        | 0.0002     |
|    loss                 | -0.00697   |
|    n_updates            | 8370       |
|    policy_gradient_loss | -0.00023   |
|    reward               | 0.06916324 |
|    value_loss           | 0.0292     |
----------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.714         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 839           |
|    time_elapsed         | 5115          |
|    total_timesteps      | 859136        |
| train/                  |               |
|    approx_kl            | 0.00042465713 |
|    clip_fraction        | 0.00361       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.182        |
|    explained_variance   | 0.000905      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0196        |
|    n_updates            | 8380          |
|    policy_gradient_loss | -0.00055      |
|    reward               | 0.0           |
|    value_loss           | 0.0248        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.718        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 840          |
|    time_elapsed         | 5121         |
|    total_timesteps      | 860160       |
| train/                  |              |
|    approx_kl            | 0.0009127956 |
|    clip_fraction        | 0.0102       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.167       |
|    explained_variance   | 0.000512     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.008        |
|    n_updates            | 8390         |
|    policy_gradient_loss | -0.00159     |
|    reward               | 0.0          |
|    value_loss           | 0.0317       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.717         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 841           |
|    time_elapsed         | 5127          |
|    total_timesteps      | 861184        |
| train/                  |               |
|    approx_kl            | 0.00027297705 |
|    clip_fraction        | 0.00537       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.151        |
|    explained_variance   | 0.000712      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00775       |
|    n_updates            | 8400          |
|    policy_gradient_loss | -0.000719     |
|    reward               | 0.044709466   |
|    value_loss           | 0.0308        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.716         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 842           |
|    time_elapsed         | 5133          |
|    total_timesteps      | 862208        |
| train/                  |               |
|    approx_kl            | 0.00051886286 |
|    clip_fraction        | 0.00908       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.17         |
|    explained_variance   | 0.000363      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0119        |
|    n_updates            | 8410          |
|    policy_gradient_loss | 0.000332      |
|    reward               | 0.0           |
|    value_loss           | 0.0292        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.722         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 843           |
|    time_elapsed         | 5139          |
|    total_timesteps      | 863232        |
| train/                  |               |
|    approx_kl            | 0.00057262566 |
|    clip_fraction        | 0.00713       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.16         |
|    explained_variance   | 0.00162       |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00425       |
|    n_updates            | 8420          |
|    policy_gradient_loss | -0.000939     |
|    reward               | 0.0           |
|    value_loss           | 0.0199        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.718         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 844           |
|    time_elapsed         | 5145          |
|    total_timesteps      | 864256        |
| train/                  |               |
|    approx_kl            | 0.0007883165  |
|    clip_fraction        | 0.0104        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.173        |
|    explained_variance   | 0.001         |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00266       |
|    n_updates            | 8430          |
|    policy_gradient_loss | -0.000961     |
|    reward               | 0.00042529858 |
|    value_loss           | 0.0237        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.721        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 845          |
|    time_elapsed         | 5150         |
|    total_timesteps      | 865280       |
| train/                  |              |
|    approx_kl            | 0.0006926244 |
|    clip_fraction        | 0.0115       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.153       |
|    explained_variance   | 0.00111      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00116      |
|    n_updates            | 8440         |
|    policy_gradient_loss | -0.00247     |
|    reward               | 0.0          |
|    value_loss           | 0.0226       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.718        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 846          |
|    time_elapsed         | 5156         |
|    total_timesteps      | 866304       |
| train/                  |              |
|    approx_kl            | 0.0007467636 |
|    clip_fraction        | 0.00928      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.14        |
|    explained_variance   | 0.0007       |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0101       |
|    n_updates            | 8450         |
|    policy_gradient_loss | -0.000713    |
|    reward               | 0.0046547367 |
|    value_loss           | 0.0275       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.723         |
| time/                   |               |
|    fps                  | 168           |
|    iterations           | 847           |
|    time_elapsed         | 5162          |
|    total_timesteps      | 867328        |
| train/                  |               |
|    approx_kl            | 0.00036459882 |
|    clip_fraction        | 0.00596       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.15         |
|    explained_variance   | 0.000358      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0102        |
|    n_updates            | 8460          |
|    policy_gradient_loss | -0.000216     |
|    reward               | 0.0040434576  |
|    value_loss           | 0.0258        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.723         |
| time/                   |               |
|    fps                  | 168           |
|    iterations           | 848           |
|    time_elapsed         | 5168          |
|    total_timesteps      | 868352        |
| train/                  |               |
|    approx_kl            | 0.00071624137 |
|    clip_fraction        | 0.0218        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.185        |
|    explained_variance   | 0.000299      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00624       |
|    n_updates            | 8470          |
|    policy_gradient_loss | -0.00245      |
|    reward               | 0.0           |
|    value_loss           | 0.0308        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.719        |
| time/                   |              |
|    fps                  | 168          |
|    iterations           | 849          |
|    time_elapsed         | 5174         |
|    total_timesteps      | 869376       |
| train/                  |              |
|    approx_kl            | 0.0003112327 |
|    clip_fraction        | 0.00986      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.224       |
|    explained_variance   | 0.000348     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0303       |
|    n_updates            | 8480         |
|    policy_gradient_loss | 0.000312     |
|    reward               | 0.010002488  |
|    value_loss           | 0.0297       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.715         |
| time/                   |               |
|    fps                  | 168           |
|    iterations           | 850           |
|    time_elapsed         | 5180          |
|    total_timesteps      | 870400        |
| train/                  |               |
|    approx_kl            | 0.00088552455 |
|    clip_fraction        | 0.00801       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.23         |
|    explained_variance   | 0.000742      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00328       |
|    n_updates            | 8490          |
|    policy_gradient_loss | 0.000772      |
|    reward               | 0.01030797    |
|    value_loss           | 0.0282        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.721         |
| time/                   |               |
|    fps                  | 168           |
|    iterations           | 851           |
|    time_elapsed         | 5186          |
|    total_timesteps      | 871424        |
| train/                  |               |
|    approx_kl            | 0.00089287007 |
|    clip_fraction        | 0.00625       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.226        |
|    explained_variance   | 0.00127       |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0049        |
|    n_updates            | 8500          |
|    policy_gradient_loss | -0.000885     |
|    reward               | 0.007492461   |
|    value_loss           | 0.0218        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.729        |
| time/                   |              |
|    fps                  | 168          |
|    iterations           | 852          |
|    time_elapsed         | 5191         |
|    total_timesteps      | 872448       |
| train/                  |              |
|    approx_kl            | 0.0010953554 |
|    clip_fraction        | 0.00947      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.193       |
|    explained_variance   | 0.000425     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00459      |
|    n_updates            | 8510         |
|    policy_gradient_loss | -0.00179     |
|    reward               | 0.001567729  |
|    value_loss           | 0.0254       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.726         |
| time/                   |               |
|    fps                  | 168           |
|    iterations           | 853           |
|    time_elapsed         | 5197          |
|    total_timesteps      | 873472        |
| train/                  |               |
|    approx_kl            | 0.00077844434 |
|    clip_fraction        | 0.00967       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.192        |
|    explained_variance   | 0.000378      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.037         |
|    n_updates            | 8520          |
|    policy_gradient_loss | 8.18e-05      |
|    reward               | 0.0013438341  |
|    value_loss           | 0.0312        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.729        |
| time/                   |              |
|    fps                  | 168          |
|    iterations           | 854          |
|    time_elapsed         | 5204         |
|    total_timesteps      | 874496       |
| train/                  |              |
|    approx_kl            | 0.0006598387 |
|    clip_fraction        | 0.00752      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.164       |
|    explained_variance   | 0.000194     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00653      |
|    n_updates            | 8530         |
|    policy_gradient_loss | -0.00169     |
|    reward               | 0.0          |
|    value_loss           | 0.0307       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.725         |
| time/                   |               |
|    fps                  | 168           |
|    iterations           | 855           |
|    time_elapsed         | 5210          |
|    total_timesteps      | 875520        |
| train/                  |               |
|    approx_kl            | 0.00032230472 |
|    clip_fraction        | 0.00303       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.161        |
|    explained_variance   | 0.000674      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00228       |
|    n_updates            | 8540          |
|    policy_gradient_loss | 0.000125      |
|    reward               | 0.0           |
|    value_loss           | 0.0261        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.719         |
| time/                   |               |
|    fps                  | 168           |
|    iterations           | 856           |
|    time_elapsed         | 5216          |
|    total_timesteps      | 876544        |
| train/                  |               |
|    approx_kl            | 0.00084764377 |
|    clip_fraction        | 0.00791       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.134        |
|    explained_variance   | 0.000696      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00571       |
|    n_updates            | 8550          |
|    policy_gradient_loss | -0.00109      |
|    reward               | -0.0050593945 |
|    value_loss           | 0.0317        |
-------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 0.719       |
| time/                   |             |
|    fps                  | 168         |
|    iterations           | 857         |
|    time_elapsed         | 5222        |
|    total_timesteps      | 877568      |
| train/                  |             |
|    approx_kl            | 0.000247485 |
|    clip_fraction        | 0.00742     |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.144      |
|    explained_variance   | 0.000781    |
|    learning_rate        | 0.0002      |
|    loss                 | 0.0143      |
|    n_updates            | 8560        |
|    policy_gradient_loss | 0.000144    |
|    reward               | 0.06352112  |
|    value_loss           | 0.0307      |
-----------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.712         |
| time/                   |               |
|    fps                  | 168           |
|    iterations           | 858           |
|    time_elapsed         | 5228          |
|    total_timesteps      | 878592        |
| train/                  |               |
|    approx_kl            | 0.00019431423 |
|    clip_fraction        | 0.0115        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.17         |
|    explained_variance   | 0.000481      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00678       |
|    n_updates            | 8570          |
|    policy_gradient_loss | 0.000335      |
|    reward               | -0.010476743  |
|    value_loss           | 0.0333        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.717        |
| time/                   |              |
|    fps                  | 168          |
|    iterations           | 859          |
|    time_elapsed         | 5234         |
|    total_timesteps      | 879616       |
| train/                  |              |
|    approx_kl            | 0.0005470526 |
|    clip_fraction        | 0.00166      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.155       |
|    explained_variance   | 0.00255      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00139      |
|    n_updates            | 8580         |
|    policy_gradient_loss | -0.00039     |
|    reward               | 0.029968714  |
|    value_loss           | 0.0215       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.722        |
| time/                   |              |
|    fps                  | 168          |
|    iterations           | 860          |
|    time_elapsed         | 5240         |
|    total_timesteps      | 880640       |
| train/                  |              |
|    approx_kl            | 0.0006737809 |
|    clip_fraction        | 0.0108       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.16        |
|    explained_variance   | -0.00021     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00603      |
|    n_updates            | 8590         |
|    policy_gradient_loss | 0.000229     |
|    reward               | 0.05963641   |
|    value_loss           | 0.0299       |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 0.727       |
| time/                   |             |
|    fps                  | 168         |
|    iterations           | 861         |
|    time_elapsed         | 5246        |
|    total_timesteps      | 881664      |
| train/                  |             |
|    approx_kl            | 0.000616807 |
|    clip_fraction        | 0.0126      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.186      |
|    explained_variance   | 0.00112     |
|    learning_rate        | 0.0002      |
|    loss                 | -0.0037     |
|    n_updates            | 8600        |
|    policy_gradient_loss | 0.000271    |
|    reward               | 0.053542785 |
|    value_loss           | 0.0261      |
-----------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.72          |
| time/                   |               |
|    fps                  | 168           |
|    iterations           | 862           |
|    time_elapsed         | 5251          |
|    total_timesteps      | 882688        |
| train/                  |               |
|    approx_kl            | 0.00084105297 |
|    clip_fraction        | 0.0109        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.165        |
|    explained_variance   | 0.000611      |
|    learning_rate        | 0.0002        |
|    loss                 | -0.0141       |
|    n_updates            | 8610          |
|    policy_gradient_loss | -0.00255      |
|    reward               | 0.0050890823  |
|    value_loss           | 0.0297        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.723        |
| time/                   |              |
|    fps                  | 168          |
|    iterations           | 863          |
|    time_elapsed         | 5258         |
|    total_timesteps      | 883712       |
| train/                  |              |
|    approx_kl            | 0.000774774  |
|    clip_fraction        | 0.0149       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.19        |
|    explained_variance   | 0.000956     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.0033      |
|    n_updates            | 8620         |
|    policy_gradient_loss | 0.000378     |
|    reward               | -0.015749246 |
|    value_loss           | 0.0248       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.723         |
| time/                   |               |
|    fps                  | 168           |
|    iterations           | 864           |
|    time_elapsed         | 5264          |
|    total_timesteps      | 884736        |
| train/                  |               |
|    approx_kl            | 0.00048668502 |
|    clip_fraction        | 0.00654       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.172        |
|    explained_variance   | 0.00023       |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00428       |
|    n_updates            | 8630          |
|    policy_gradient_loss | -0.0012       |
|    reward               | 0.034782093   |
|    value_loss           | 0.0263        |
-------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 0.731       |
| time/                   |             |
|    fps                  | 168         |
|    iterations           | 865         |
|    time_elapsed         | 5271        |
|    total_timesteps      | 885760      |
| train/                  |             |
|    approx_kl            | 0.000737203 |
|    clip_fraction        | 0.0175      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.17       |
|    explained_variance   | 0.0012      |
|    learning_rate        | 0.0002      |
|    loss                 | 0.00999     |
|    n_updates            | 8640        |
|    policy_gradient_loss | -0.00226    |
|    reward               | -0.06454307 |
|    value_loss           | 0.023       |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.74         |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 866          |
|    time_elapsed         | 5279         |
|    total_timesteps      | 886784       |
| train/                  |              |
|    approx_kl            | 0.0007637738 |
|    clip_fraction        | 0.0182       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.169       |
|    explained_variance   | 0.00128      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00891      |
|    n_updates            | 8650         |
|    policy_gradient_loss | -0.0029      |
|    reward               | 0.024832552  |
|    value_loss           | 0.0236       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.738         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 867           |
|    time_elapsed         | 5286          |
|    total_timesteps      | 887808        |
| train/                  |               |
|    approx_kl            | 0.00078571745 |
|    clip_fraction        | 0.00449       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.176        |
|    explained_variance   | -8.55e-05     |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0193        |
|    n_updates            | 8660          |
|    policy_gradient_loss | -6.83e-07     |
|    reward               | -0.28440273   |
|    value_loss           | 0.0231        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.744         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 869           |
|    time_elapsed         | 5301          |
|    total_timesteps      | 889856        |
| train/                  |               |
|    approx_kl            | 0.00083609426 |
|    clip_fraction        | 0.016         |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.172        |
|    explained_variance   | 0.000498      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00953       |
|    n_updates            | 8680          |
|    policy_gradient_loss | 0.00067       |
|    reward               | 0.0071463212  |
|    value_loss           | 0.0329        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.74          |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 870           |
|    time_elapsed         | 5307          |
|    total_timesteps      | 890880        |
| train/                  |               |
|    approx_kl            | 9.7524084e-05 |
|    clip_fraction        | 0.00645       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.154        |
|    explained_variance   | 0.000688      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0111        |
|    n_updates            | 8690          |
|    policy_gradient_loss | -0.00182      |
|    reward               | 0.0           |
|    value_loss           | 0.0253        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.742         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 871           |
|    time_elapsed         | 5315          |
|    total_timesteps      | 891904        |
| train/                  |               |
|    approx_kl            | 0.00055372575 |
|    clip_fraction        | 0.00947       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.179        |
|    explained_variance   | 0.000819      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0101        |
|    n_updates            | 8700          |
|    policy_gradient_loss | -0.000353     |
|    reward               | 0.0024782703  |
|    value_loss           | 0.0246        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.741         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 872           |
|    time_elapsed         | 5322          |
|    total_timesteps      | 892928        |
| train/                  |               |
|    approx_kl            | 0.00044378877 |
|    clip_fraction        | 0.00674       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.154        |
|    explained_variance   | 0.000229      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0174        |
|    n_updates            | 8710          |
|    policy_gradient_loss | -0.00144      |
|    reward               | -0.019429555  |
|    value_loss           | 0.0289        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.749         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 873           |
|    time_elapsed         | 5329          |
|    total_timesteps      | 893952        |
| train/                  |               |
|    approx_kl            | 0.00048389478 |
|    clip_fraction        | 0.0188        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.171        |
|    explained_variance   | 0.00021       |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0195        |
|    n_updates            | 8720          |
|    policy_gradient_loss | -0.000899     |
|    reward               | -0.011221529  |
|    value_loss           | 0.0316        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.757         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 874           |
|    time_elapsed         | 5336          |
|    total_timesteps      | 894976        |
| train/                  |               |
|    approx_kl            | 0.00038696802 |
|    clip_fraction        | 0.0185        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.199        |
|    explained_variance   | 0.000562      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0121        |
|    n_updates            | 8730          |
|    policy_gradient_loss | 0.000227      |
|    reward               | 0.0067532845  |
|    value_loss           | 0.0292        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.763         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 875           |
|    time_elapsed         | 5343          |
|    total_timesteps      | 896000        |
| train/                  |               |
|    approx_kl            | 0.00039153855 |
|    clip_fraction        | 0.0123        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.229        |
|    explained_variance   | 3.56e-05      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.022         |
|    n_updates            | 8740          |
|    policy_gradient_loss | 0.000615      |
|    reward               | 0.0           |
|    value_loss           | 0.0445        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.754        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 876          |
|    time_elapsed         | 5352         |
|    total_timesteps      | 897024       |
| train/                  |              |
|    approx_kl            | 0.0005693181 |
|    clip_fraction        | 0.0225       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.205       |
|    explained_variance   | 0.00132      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00754      |
|    n_updates            | 8750         |
|    policy_gradient_loss | -0.00397     |
|    reward               | 0.009599197  |
|    value_loss           | 0.0204       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.76          |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 877           |
|    time_elapsed         | 5360          |
|    total_timesteps      | 898048        |
| train/                  |               |
|    approx_kl            | 0.00020394154 |
|    clip_fraction        | 0.00381       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.184        |
|    explained_variance   | 0.000704      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00696       |
|    n_updates            | 8760          |
|    policy_gradient_loss | -0.000355     |
|    reward               | 0.0           |
|    value_loss           | 0.0255        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.769         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 878           |
|    time_elapsed         | 5367          |
|    total_timesteps      | 899072        |
| train/                  |               |
|    approx_kl            | 0.00051698316 |
|    clip_fraction        | 0.00967       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.202        |
|    explained_variance   | -0.000702     |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00756       |
|    n_updates            | 8770          |
|    policy_gradient_loss | 0.000573      |
|    reward               | 0.0           |
|    value_loss           | 0.0288        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.771         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 879           |
|    time_elapsed         | 5375          |
|    total_timesteps      | 900096        |
| train/                  |               |
|    approx_kl            | 0.00074970606 |
|    clip_fraction        | 0.018         |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.244        |
|    explained_variance   | 0.000539      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0123        |
|    n_updates            | 8780          |
|    policy_gradient_loss | -0.000419     |
|    reward               | 0.0           |
|    value_loss           | 0.0248        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.769         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 880           |
|    time_elapsed         | 5383          |
|    total_timesteps      | 901120        |
| train/                  |               |
|    approx_kl            | 0.00032286596 |
|    clip_fraction        | 0.0043        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.263        |
|    explained_variance   | 0.000755      |
|    learning_rate        | 0.0002        |
|    loss                 | -0.0034       |
|    n_updates            | 8790          |
|    policy_gradient_loss | 0.000524      |
|    reward               | 0.028853953   |
|    value_loss           | 0.0196        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.765        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 881          |
|    time_elapsed         | 5389         |
|    total_timesteps      | 902144       |
| train/                  |              |
|    approx_kl            | 0.0002902734 |
|    clip_fraction        | 0.00898      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.295       |
|    explained_variance   | -3.58e-06    |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00324     |
|    n_updates            | 8800         |
|    policy_gradient_loss | 0.0012       |
|    reward               | 0.0          |
|    value_loss           | 0.0183       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.757        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 882          |
|    time_elapsed         | 5396         |
|    total_timesteps      | 903168       |
| train/                  |              |
|    approx_kl            | 0.0010847768 |
|    clip_fraction        | 0.00596      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.261       |
|    explained_variance   | 0.000489     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0101       |
|    n_updates            | 8810         |
|    policy_gradient_loss | -0.0012      |
|    reward               | 0.004157809  |
|    value_loss           | 0.0178       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.754        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 883          |
|    time_elapsed         | 5404         |
|    total_timesteps      | 904192       |
| train/                  |              |
|    approx_kl            | 0.0012858484 |
|    clip_fraction        | 0.0196       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.245       |
|    explained_variance   | 0.000934     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.0015      |
|    n_updates            | 8820         |
|    policy_gradient_loss | -0.00238     |
|    reward               | 0.0          |
|    value_loss           | 0.0234       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.757         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 884           |
|    time_elapsed         | 5411          |
|    total_timesteps      | 905216        |
| train/                  |               |
|    approx_kl            | 3.7755468e-05 |
|    clip_fraction        | 0.00771       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.241        |
|    explained_variance   | 0.000353      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00386       |
|    n_updates            | 8830          |
|    policy_gradient_loss | 0.000675      |
|    reward               | 0.018280527   |
|    value_loss           | 0.0195        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.753         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 885           |
|    time_elapsed         | 5420          |
|    total_timesteps      | 906240        |
| train/                  |               |
|    approx_kl            | 0.00033014937 |
|    clip_fraction        | 0.0108        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.24         |
|    explained_variance   | 0.00126       |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00678       |
|    n_updates            | 8840          |
|    policy_gradient_loss | -0.000451     |
|    reward               | 0.0           |
|    value_loss           | 0.0256        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.751        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 886          |
|    time_elapsed         | 5427         |
|    total_timesteps      | 907264       |
| train/                  |              |
|    approx_kl            | 0.0017244889 |
|    clip_fraction        | 0.0213       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.236       |
|    explained_variance   | 0.000261     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00506      |
|    n_updates            | 8850         |
|    policy_gradient_loss | -0.00252     |
|    reward               | 0.021994     |
|    value_loss           | 0.0215       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.756        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 887          |
|    time_elapsed         | 5434         |
|    total_timesteps      | 908288       |
| train/                  |              |
|    approx_kl            | 0.0010847307 |
|    clip_fraction        | 0.0177       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.247       |
|    explained_variance   | 0.000324     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0129       |
|    n_updates            | 8860         |
|    policy_gradient_loss | -0.00101     |
|    reward               | 0.024245193  |
|    value_loss           | 0.0247       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.755        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 888          |
|    time_elapsed         | 5441         |
|    total_timesteps      | 909312       |
| train/                  |              |
|    approx_kl            | 0.0012765158 |
|    clip_fraction        | 0.0184       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.205       |
|    explained_variance   | 0.000918     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00776      |
|    n_updates            | 8870         |
|    policy_gradient_loss | -0.00442     |
|    reward               | -0.00020002  |
|    value_loss           | 0.0206       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.76          |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 889           |
|    time_elapsed         | 5448          |
|    total_timesteps      | 910336        |
| train/                  |               |
|    approx_kl            | 0.00067415764 |
|    clip_fraction        | 0.00791       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.181        |
|    explained_variance   | 0.000114      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00181       |
|    n_updates            | 8880          |
|    policy_gradient_loss | -0.0016       |
|    reward               | 0.006521842   |
|    value_loss           | 0.0266        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.764         |
| time/                   |               |
|    fps                  | 167           |
|    iterations           | 890           |
|    time_elapsed         | 5455          |
|    total_timesteps      | 911360        |
| train/                  |               |
|    approx_kl            | 0.00034428277 |
|    clip_fraction        | 0.0085        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.178        |
|    explained_variance   | 0.000615      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00772       |
|    n_updates            | 8890          |
|    policy_gradient_loss | -0.000537     |
|    reward               | 0.0           |
|    value_loss           | 0.0302        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.761        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 891          |
|    time_elapsed         | 5463         |
|    total_timesteps      | 912384       |
| train/                  |              |
|    approx_kl            | 0.0005546988 |
|    clip_fraction        | 0.0139       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.19        |
|    explained_variance   | 0.000901     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0117       |
|    n_updates            | 8900         |
|    policy_gradient_loss | -0.000178    |
|    reward               | 0.0          |
|    value_loss           | 0.0263       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.752         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 892           |
|    time_elapsed         | 5471          |
|    total_timesteps      | 913408        |
| train/                  |               |
|    approx_kl            | 0.00072857656 |
|    clip_fraction        | 0.00566       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.182        |
|    explained_variance   | 0.0012        |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00242       |
|    n_updates            | 8910          |
|    policy_gradient_loss | -0.000647     |
|    reward               | 0.0           |
|    value_loss           | 0.0259        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.744        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 893          |
|    time_elapsed         | 5478         |
|    total_timesteps      | 914432       |
| train/                  |              |
|    approx_kl            | 0.0005573394 |
|    clip_fraction        | 0.0149       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.211       |
|    explained_variance   | 8.09e-05     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0256       |
|    n_updates            | 8920         |
|    policy_gradient_loss | 0.000168     |
|    reward               | 0.0          |
|    value_loss           | 0.044        |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.745        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 894          |
|    time_elapsed         | 5485         |
|    total_timesteps      | 915456       |
| train/                  |              |
|    approx_kl            | 0.000705806  |
|    clip_fraction        | 0.00898      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.196       |
|    explained_variance   | 0.000556     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0073       |
|    n_updates            | 8930         |
|    policy_gradient_loss | -0.000937    |
|    reward               | -0.021269789 |
|    value_loss           | 0.0333       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.746         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 895           |
|    time_elapsed         | 5493          |
|    total_timesteps      | 916480        |
| train/                  |               |
|    approx_kl            | 0.00042886916 |
|    clip_fraction        | 0.00684       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.192        |
|    explained_variance   | 0.000306      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0136        |
|    n_updates            | 8940          |
|    policy_gradient_loss | -0.000219     |
|    reward               | 0.010258954   |
|    value_loss           | 0.0239        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.749        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 896          |
|    time_elapsed         | 5501         |
|    total_timesteps      | 917504       |
| train/                  |              |
|    approx_kl            | 0.0006822172 |
|    clip_fraction        | 0.0161       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.229       |
|    explained_variance   | 0.000512     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00117     |
|    n_updates            | 8950         |
|    policy_gradient_loss | 2.74e-05     |
|    reward               | 0.0          |
|    value_loss           | 0.0247       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.741         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 897           |
|    time_elapsed         | 5508          |
|    total_timesteps      | 918528        |
| train/                  |               |
|    approx_kl            | 0.00083413534 |
|    clip_fraction        | 0.0104        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.187        |
|    explained_variance   | 0.000431      |
|    learning_rate        | 0.0002        |
|    loss                 | -0.00676      |
|    n_updates            | 8960          |
|    policy_gradient_loss | -0.00236      |
|    reward               | 0.0           |
|    value_loss           | 0.0238        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.742         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 898           |
|    time_elapsed         | 5515          |
|    total_timesteps      | 919552        |
| train/                  |               |
|    approx_kl            | 0.0005416854  |
|    clip_fraction        | 0.011         |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.188        |
|    explained_variance   | 0.000903      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00221       |
|    n_updates            | 8970          |
|    policy_gradient_loss | -0.000961     |
|    reward               | -0.0061185523 |
|    value_loss           | 0.0198        |
-------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 0.727       |
| time/                   |             |
|    fps                  | 166         |
|    iterations           | 899         |
|    time_elapsed         | 5522        |
|    total_timesteps      | 920576      |
| train/                  |             |
|    approx_kl            | 0.000863183 |
|    clip_fraction        | 0.0155      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.159      |
|    explained_variance   | 0.000228    |
|    learning_rate        | 0.0002      |
|    loss                 | 0.0118      |
|    n_updates            | 8980        |
|    policy_gradient_loss | -0.0037     |
|    reward               | 0.0         |
|    value_loss           | 0.0284      |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.711        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 900          |
|    time_elapsed         | 5529         |
|    total_timesteps      | 921600       |
| train/                  |              |
|    approx_kl            | 0.0028484182 |
|    clip_fraction        | 0.00703      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.182       |
|    explained_variance   | 0.000603     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0122       |
|    n_updates            | 8990         |
|    policy_gradient_loss | -0.000771    |
|    reward               | 0.06307679   |
|    value_loss           | 0.0285       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.717         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 901           |
|    time_elapsed         | 5536          |
|    total_timesteps      | 922624        |
| train/                  |               |
|    approx_kl            | 0.00062501634 |
|    clip_fraction        | 0.0154        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.195        |
|    explained_variance   | 0.00135       |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0022        |
|    n_updates            | 9000          |
|    policy_gradient_loss | -0.00119      |
|    reward               | 0.015076309   |
|    value_loss           | 0.0223        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.72          |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 902           |
|    time_elapsed         | 5543          |
|    total_timesteps      | 923648        |
| train/                  |               |
|    approx_kl            | 0.00058347126 |
|    clip_fraction        | 0.00947       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.198        |
|    explained_variance   | 0.000774      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00811       |
|    n_updates            | 9010          |
|    policy_gradient_loss | -0.000649     |
|    reward               | 0.0           |
|    value_loss           | 0.0257        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.725        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 903          |
|    time_elapsed         | 5549         |
|    total_timesteps      | 924672       |
| train/                  |              |
|    approx_kl            | 0.0007061083 |
|    clip_fraction        | 0.011        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.172       |
|    explained_variance   | 0.000713     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00653      |
|    n_updates            | 9020         |
|    policy_gradient_loss | -0.00215     |
|    reward               | 0.017738882  |
|    value_loss           | 0.0293       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.731        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 904          |
|    time_elapsed         | 5557         |
|    total_timesteps      | 925696       |
| train/                  |              |
|    approx_kl            | 0.0002032473 |
|    clip_fraction        | 0.0181       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.191       |
|    explained_variance   | 0.000895     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00217     |
|    n_updates            | 9030         |
|    policy_gradient_loss | -0.000935    |
|    reward               | 0.009156582  |
|    value_loss           | 0.0188       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.719        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 906          |
|    time_elapsed         | 5571         |
|    total_timesteps      | 927744       |
| train/                  |              |
|    approx_kl            | 0.0012418858 |
|    clip_fraction        | 0.0152       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.186       |
|    explained_variance   | 0.000921     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00571      |
|    n_updates            | 9050         |
|    policy_gradient_loss | -0.00315     |
|    reward               | 0.012884646  |
|    value_loss           | 0.0242       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.724         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 908           |
|    time_elapsed         | 5586          |
|    total_timesteps      | 929792        |
| train/                  |               |
|    approx_kl            | 2.5557238e-05 |
|    clip_fraction        | 0.00244       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.216        |
|    explained_variance   | 0.000888      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00287       |
|    n_updates            | 9070          |
|    policy_gradient_loss | 0.00105       |
|    reward               | 0.0095247775  |
|    value_loss           | 0.0237        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.723        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 909          |
|    time_elapsed         | 5593         |
|    total_timesteps      | 930816       |
| train/                  |              |
|    approx_kl            | 0.0010324537 |
|    clip_fraction        | 0.017        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.19        |
|    explained_variance   | 0.000806     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00166      |
|    n_updates            | 9080         |
|    policy_gradient_loss | -0.00165     |
|    reward               | 0.010572486  |
|    value_loss           | 0.0278       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.734         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 910           |
|    time_elapsed         | 5601          |
|    total_timesteps      | 931840        |
| train/                  |               |
|    approx_kl            | 0.0004482849  |
|    clip_fraction        | 0.00518       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.171        |
|    explained_variance   | 7.7e-05       |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0118        |
|    n_updates            | 9090          |
|    policy_gradient_loss | -0.00109      |
|    reward               | -0.0038618757 |
|    value_loss           | 0.028         |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.737         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 911           |
|    time_elapsed         | 5608          |
|    total_timesteps      | 932864        |
| train/                  |               |
|    approx_kl            | 0.00041934976 |
|    clip_fraction        | 0.0165        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.198        |
|    explained_variance   | 0.00152       |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00175       |
|    n_updates            | 9100          |
|    policy_gradient_loss | -1.07e-05     |
|    reward               | 0.022487335   |
|    value_loss           | 0.026         |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.733        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 912          |
|    time_elapsed         | 5616         |
|    total_timesteps      | 933888       |
| train/                  |              |
|    approx_kl            | 0.0006767608 |
|    clip_fraction        | 0.0104       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.193       |
|    explained_variance   | 0.000872     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00292     |
|    n_updates            | 9110         |
|    policy_gradient_loss | -0.000735    |
|    reward               | 0.0718312    |
|    value_loss           | 0.0218       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.732         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 913           |
|    time_elapsed         | 5622          |
|    total_timesteps      | 934912        |
| train/                  |               |
|    approx_kl            | 0.00061509287 |
|    clip_fraction        | 0.00869       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.206        |
|    explained_variance   | 0.000221      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00781       |
|    n_updates            | 9120          |
|    policy_gradient_loss | 4.93e-05      |
|    reward               | 0.027396604   |
|    value_loss           | 0.0238        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.729         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 914           |
|    time_elapsed         | 5629          |
|    total_timesteps      | 935936        |
| train/                  |               |
|    approx_kl            | 0.00024211215 |
|    clip_fraction        | 0.000781      |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.19         |
|    explained_variance   | 0.000679      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00737       |
|    n_updates            | 9130          |
|    policy_gradient_loss | -0.000279     |
|    reward               | -0.00020002   |
|    value_loss           | 0.0241        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.723         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 915           |
|    time_elapsed         | 5636          |
|    total_timesteps      | 936960        |
| train/                  |               |
|    approx_kl            | 0.00028776046 |
|    clip_fraction        | 0.00811       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.216        |
|    explained_variance   | 0.000177      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00362       |
|    n_updates            | 9140          |
|    policy_gradient_loss | 0.000447      |
|    reward               | 0.005939674   |
|    value_loss           | 0.0307        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.722         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 916           |
|    time_elapsed         | 5643          |
|    total_timesteps      | 937984        |
| train/                  |               |
|    approx_kl            | 0.00048534048 |
|    clip_fraction        | 0.013         |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.21         |
|    explained_variance   | 0.000414      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00624       |
|    n_updates            | 9150          |
|    policy_gradient_loss | -0.00121      |
|    reward               | 0.026778746   |
|    value_loss           | 0.0225        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.723        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 917          |
|    time_elapsed         | 5651         |
|    total_timesteps      | 939008       |
| train/                  |              |
|    approx_kl            | 0.0010890944 |
|    clip_fraction        | 0.0183       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.209       |
|    explained_variance   | 0.00102      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00343      |
|    n_updates            | 9160         |
|    policy_gradient_loss | -0.00136     |
|    reward               | 0.059511     |
|    value_loss           | 0.0274       |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 0.721       |
| time/                   |             |
|    fps                  | 166         |
|    iterations           | 918         |
|    time_elapsed         | 5659        |
|    total_timesteps      | 940032      |
| train/                  |             |
|    approx_kl            | 0.001780798 |
|    clip_fraction        | 0.0206      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.243      |
|    explained_variance   | 0.00184     |
|    learning_rate        | 0.0002      |
|    loss                 | 0.0193      |
|    n_updates            | 9170        |
|    policy_gradient_loss | 5.57e-05    |
|    reward               | 0.0         |
|    value_loss           | 0.025       |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.733        |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 919          |
|    time_elapsed         | 5666         |
|    total_timesteps      | 941056       |
| train/                  |              |
|    approx_kl            | 0.0010748187 |
|    clip_fraction        | 0.0042       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.233       |
|    explained_variance   | 0.000751     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00428     |
|    n_updates            | 9180         |
|    policy_gradient_loss | -0.00104     |
|    reward               | 0.0          |
|    value_loss           | 0.0184       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.733         |
| time/                   |               |
|    fps                  | 166           |
|    iterations           | 920           |
|    time_elapsed         | 5675          |
|    total_timesteps      | 942080        |
| train/                  |               |
|    approx_kl            | 0.00089749316 |
|    clip_fraction        | 0.024         |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.229        |
|    explained_variance   | 0.00118       |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00308       |
|    n_updates            | 9190          |
|    policy_gradient_loss | -0.00203      |
|    reward               | 0.0016535837  |
|    value_loss           | 0.0216        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.732        |
| time/                   |              |
|    fps                  | 165          |
|    iterations           | 921          |
|    time_elapsed         | 5682         |
|    total_timesteps      | 943104       |
| train/                  |              |
|    approx_kl            | 0.0005359854 |
|    clip_fraction        | 0.0107       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.2         |
|    explained_variance   | 0.00128      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00682      |
|    n_updates            | 9200         |
|    policy_gradient_loss | -0.00135     |
|    reward               | 0.08950756   |
|    value_loss           | 0.0262       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.735        |
| time/                   |              |
|    fps                  | 165          |
|    iterations           | 922          |
|    time_elapsed         | 5688         |
|    total_timesteps      | 944128       |
| train/                  |              |
|    approx_kl            | 0.0002508607 |
|    clip_fraction        | 0.0147       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.238       |
|    explained_variance   | 0.00145      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00928      |
|    n_updates            | 9210         |
|    policy_gradient_loss | 7.85e-05     |
|    reward               | -0.005652719 |
|    value_loss           | 0.0368       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.73          |
| time/                   |               |
|    fps                  | 165           |
|    iterations           | 923           |
|    time_elapsed         | 5695          |
|    total_timesteps      | 945152        |
| train/                  |               |
|    approx_kl            | 0.00063031045 |
|    clip_fraction        | 0.0161        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.259        |
|    explained_variance   | 0.000212      |
|    learning_rate        | 0.0002        |
|    loss                 | -0.000116     |
|    n_updates            | 9220          |
|    policy_gradient_loss | 0.000335      |
|    reward               | -0.015949512  |
|    value_loss           | 0.0347        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.726         |
| time/                   |               |
|    fps                  | 165           |
|    iterations           | 924           |
|    time_elapsed         | 5703          |
|    total_timesteps      | 946176        |
| train/                  |               |
|    approx_kl            | 0.00035298953 |
|    clip_fraction        | 0.0101        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.276        |
|    explained_variance   | 0.00131       |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00859       |
|    n_updates            | 9230          |
|    policy_gradient_loss | 0.000664      |
|    reward               | -0.017253267  |
|    value_loss           | 0.03          |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.729        |
| time/                   |              |
|    fps                  | 165          |
|    iterations           | 925          |
|    time_elapsed         | 5710         |
|    total_timesteps      | 947200       |
| train/                  |              |
|    approx_kl            | 0.0007842491 |
|    clip_fraction        | 0.0083       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.297       |
|    explained_variance   | -0.000521    |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00653      |
|    n_updates            | 9240         |
|    policy_gradient_loss | 0.000807     |
|    reward               | 0.035023358  |
|    value_loss           | 0.0247       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.722        |
| time/                   |              |
|    fps                  | 165          |
|    iterations           | 926          |
|    time_elapsed         | 5717         |
|    total_timesteps      | 948224       |
| train/                  |              |
|    approx_kl            | 0.0006263304 |
|    clip_fraction        | 0.0161       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.304       |
|    explained_variance   | 0.000778     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.000639     |
|    n_updates            | 9250         |
|    policy_gradient_loss | 0.000279     |
|    reward               | 0.004551238  |
|    value_loss           | 0.0189       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.73         |
| time/                   |              |
|    fps                  | 165          |
|    iterations           | 927          |
|    time_elapsed         | 5724         |
|    total_timesteps      | 949248       |
| train/                  |              |
|    approx_kl            | 0.0019098108 |
|    clip_fraction        | 0.0254       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.274       |
|    explained_variance   | 0.000728     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00527      |
|    n_updates            | 9260         |
|    policy_gradient_loss | -0.00331     |
|    reward               | 0.006453864  |
|    value_loss           | 0.0243       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.737        |
| time/                   |              |
|    fps                  | 165          |
|    iterations           | 928          |
|    time_elapsed         | 5731         |
|    total_timesteps      | 950272       |
| train/                  |              |
|    approx_kl            | 0.0006371885 |
|    clip_fraction        | 0.0149       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.301       |
|    explained_variance   | 0.00103      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0124       |
|    n_updates            | 9270         |
|    policy_gradient_loss | 0.000376     |
|    reward               | 0.04851502   |
|    value_loss           | 0.0244       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.749        |
| time/                   |              |
|    fps                  | 165          |
|    iterations           | 929          |
|    time_elapsed         | 5738         |
|    total_timesteps      | 951296       |
| train/                  |              |
|    approx_kl            | 0.0007513228 |
|    clip_fraction        | 0.000391     |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.297       |
|    explained_variance   | 0.000745     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00643      |
|    n_updates            | 9280         |
|    policy_gradient_loss | 0.000196     |
|    reward               | 0.032368172  |
|    value_loss           | 0.0253       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.75         |
| time/                   |              |
|    fps                  | 165          |
|    iterations           | 930          |
|    time_elapsed         | 5745         |
|    total_timesteps      | 952320       |
| train/                  |              |
|    approx_kl            | 0.0021100654 |
|    clip_fraction        | 0.0264       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.288       |
|    explained_variance   | 0.00162      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00592      |
|    n_updates            | 9290         |
|    policy_gradient_loss | -0.00455     |
|    reward               | 0.026017964  |
|    value_loss           | 0.0133       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.745         |
| time/                   |               |
|    fps                  | 165           |
|    iterations           | 931           |
|    time_elapsed         | 5753          |
|    total_timesteps      | 953344        |
| train/                  |               |
|    approx_kl            | 0.00027077948 |
|    clip_fraction        | 0.0131        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.293        |
|    explained_variance   | 0.00151       |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00474       |
|    n_updates            | 9300          |
|    policy_gradient_loss | -8.02e-05     |
|    reward               | 0.0           |
|    value_loss           | 0.0192        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.742        |
| time/                   |              |
|    fps                  | 165          |
|    iterations           | 932          |
|    time_elapsed         | 5760         |
|    total_timesteps      | 954368       |
| train/                  |              |
|    approx_kl            | 0.0012787407 |
|    clip_fraction        | 0.00723      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.268       |
|    explained_variance   | -8.89e-05    |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00971      |
|    n_updates            | 9310         |
|    policy_gradient_loss | -0.00104     |
|    reward               | 0.009660383  |
|    value_loss           | 0.0192       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.744        |
| time/                   |              |
|    fps                  | 165          |
|    iterations           | 933          |
|    time_elapsed         | 5768         |
|    total_timesteps      | 955392       |
| train/                  |              |
|    approx_kl            | 0.0010262944 |
|    clip_fraction        | 0.0104       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.219       |
|    explained_variance   | 0.000454     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00171     |
|    n_updates            | 9320         |
|    policy_gradient_loss | -0.00179     |
|    reward               | 0.0          |
|    value_loss           | 0.0191       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.743        |
| time/                   |              |
|    fps                  | 165          |
|    iterations           | 934          |
|    time_elapsed         | 5775         |
|    total_timesteps      | 956416       |
| train/                  |              |
|    approx_kl            | 0.0006216475 |
|    clip_fraction        | 0.0139       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.185       |
|    explained_variance   | 0.00126      |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00429     |
|    n_updates            | 9330         |
|    policy_gradient_loss | -0.00285     |
|    reward               | 0.0          |
|    value_loss           | 0.0166       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.746        |
| time/                   |              |
|    fps                  | 165          |
|    iterations           | 935          |
|    time_elapsed         | 5783         |
|    total_timesteps      | 957440       |
| train/                  |              |
|    approx_kl            | 0.0005700239 |
|    clip_fraction        | 0.0206       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.226       |
|    explained_variance   | 0.000668     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00257      |
|    n_updates            | 9340         |
|    policy_gradient_loss | -0.00021     |
|    reward               | -0.05346313  |
|    value_loss           | 0.0324       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.744        |
| time/                   |              |
|    fps                  | 165          |
|    iterations           | 936          |
|    time_elapsed         | 5790         |
|    total_timesteps      | 958464       |
| train/                  |              |
|    approx_kl            | 0.0010474873 |
|    clip_fraction        | 0.0163       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.255       |
|    explained_variance   | 0.00216      |
|    learning_rate        | 0.0002       |
|    loss                 | -3.39e-06    |
|    n_updates            | 9350         |
|    policy_gradient_loss | 0.000216     |
|    reward               | 0.025922423  |
|    value_loss           | 0.0303       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.746        |
| time/                   |              |
|    fps                  | 165          |
|    iterations           | 937          |
|    time_elapsed         | 5798         |
|    total_timesteps      | 959488       |
| train/                  |              |
|    approx_kl            | 0.0011577704 |
|    clip_fraction        | 0.0139       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.278       |
|    explained_variance   | 0.000558     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0359       |
|    n_updates            | 9360         |
|    policy_gradient_loss | 0.000989     |
|    reward               | 0.0          |
|    value_loss           | 0.0303       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.746         |
| time/                   |               |
|    fps                  | 165           |
|    iterations           | 938           |
|    time_elapsed         | 5805          |
|    total_timesteps      | 960512        |
| train/                  |               |
|    approx_kl            | 0.00090573897 |
|    clip_fraction        | 0.0238        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.277        |
|    explained_variance   | 0.00157       |
|    learning_rate        | 0.0002        |
|    loss                 | -0.00224      |
|    n_updates            | 9370          |
|    policy_gradient_loss | -0.00303      |
|    reward               | -0.14398299   |
|    value_loss           | 0.02          |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.754        |
| time/                   |              |
|    fps                  | 165          |
|    iterations           | 939          |
|    time_elapsed         | 5811         |
|    total_timesteps      | 961536       |
| train/                  |              |
|    approx_kl            | 0.0014928761 |
|    clip_fraction        | 0.018        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.333       |
|    explained_variance   | 0.00137      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00374      |
|    n_updates            | 9380         |
|    policy_gradient_loss | 0.000486     |
|    reward               | 0.026137177  |
|    value_loss           | 0.0237       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.753        |
| time/                   |              |
|    fps                  | 165          |
|    iterations           | 940          |
|    time_elapsed         | 5818         |
|    total_timesteps      | 962560       |
| train/                  |              |
|    approx_kl            | 0.0015719683 |
|    clip_fraction        | 0.0149       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.294       |
|    explained_variance   | 0.00101      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00589      |
|    n_updates            | 9390         |
|    policy_gradient_loss | -0.00221     |
|    reward               | 0.0          |
|    value_loss           | 0.0213       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.751         |
| time/                   |               |
|    fps                  | 165           |
|    iterations           | 941           |
|    time_elapsed         | 5826          |
|    total_timesteps      | 963584        |
| train/                  |               |
|    approx_kl            | 0.00019377389 |
|    clip_fraction        | 0.00264       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.311        |
|    explained_variance   | 0.000612      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00241       |
|    n_updates            | 9400          |
|    policy_gradient_loss | 0.00076       |
|    reward               | 0.004950868   |
|    value_loss           | 0.0206        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.756        |
| time/                   |              |
|    fps                  | 165          |
|    iterations           | 942          |
|    time_elapsed         | 5833         |
|    total_timesteps      | 964608       |
| train/                  |              |
|    approx_kl            | 0.0010695925 |
|    clip_fraction        | 0.00947      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.34        |
|    explained_variance   | 0.000914     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00676      |
|    n_updates            | 9410         |
|    policy_gradient_loss | 0.000861     |
|    reward               | 0.049159285  |
|    value_loss           | 0.0186       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.757        |
| time/                   |              |
|    fps                  | 165          |
|    iterations           | 943          |
|    time_elapsed         | 5841         |
|    total_timesteps      | 965632       |
| train/                  |              |
|    approx_kl            | 0.0018133319 |
|    clip_fraction        | 0.029        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.325       |
|    explained_variance   | 0.00292      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00318      |
|    n_updates            | 9420         |
|    policy_gradient_loss | -0.00462     |
|    reward               | 0.0          |
|    value_loss           | 0.0173       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.761        |
| time/                   |              |
|    fps                  | 165          |
|    iterations           | 944          |
|    time_elapsed         | 5848         |
|    total_timesteps      | 966656       |
| train/                  |              |
|    approx_kl            | 0.0011894433 |
|    clip_fraction        | 0.0131       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.321       |
|    explained_variance   | 0.000683     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.000842    |
|    n_updates            | 9430         |
|    policy_gradient_loss | -0.00133     |
|    reward               | 0.0          |
|    value_loss           | 0.0185       |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 0.761       |
| time/                   |             |
|    fps                  | 165         |
|    iterations           | 945         |
|    time_elapsed         | 5856        |
|    total_timesteps      | 967680      |
| train/                  |             |
|    approx_kl            | 0.002337277 |
|    clip_fraction        | 0.0296      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.318      |
|    explained_variance   | 0.000399    |
|    learning_rate        | 0.0002      |
|    loss                 | -0.00484    |
|    n_updates            | 9440        |
|    policy_gradient_loss | -0.00272    |
|    reward               | 0.0         |
|    value_loss           | 0.023       |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 0.753       |
| time/                   |             |
|    fps                  | 165         |
|    iterations           | 946         |
|    time_elapsed         | 5862        |
|    total_timesteps      | 968704      |
| train/                  |             |
|    approx_kl            | 0.001714057 |
|    clip_fraction        | 0.00879     |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.295      |
|    explained_variance   | 0.00148     |
|    learning_rate        | 0.0002      |
|    loss                 | -0.0077     |
|    n_updates            | 9450        |
|    policy_gradient_loss | -0.00108    |
|    reward               | 0.023739088 |
|    value_loss           | 0.0177      |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.751        |
| time/                   |              |
|    fps                  | 165          |
|    iterations           | 947          |
|    time_elapsed         | 5869         |
|    total_timesteps      | 969728       |
| train/                  |              |
|    approx_kl            | 0.0007946095 |
|    clip_fraction        | 0.00635      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.253       |
|    explained_variance   | 0.00148      |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00142     |
|    n_updates            | 9460         |
|    policy_gradient_loss | -0.00137     |
|    reward               | -0.00020002  |
|    value_loss           | 0.0157       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.757         |
| time/                   |               |
|    fps                  | 165           |
|    iterations           | 948           |
|    time_elapsed         | 5876          |
|    total_timesteps      | 970752        |
| train/                  |               |
|    approx_kl            | 0.00061474706 |
|    clip_fraction        | 0.0104        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.247        |
|    explained_variance   | 0.0034        |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00336       |
|    n_updates            | 9470          |
|    policy_gradient_loss | -0.000633     |
|    reward               | 0.0           |
|    value_loss           | 0.0225        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.764        |
| time/                   |              |
|    fps                  | 165          |
|    iterations           | 949          |
|    time_elapsed         | 5884         |
|    total_timesteps      | 971776       |
| train/                  |              |
|    approx_kl            | 0.0012387587 |
|    clip_fraction        | 0.0173       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.209       |
|    explained_variance   | 0.0034       |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00479      |
|    n_updates            | 9480         |
|    policy_gradient_loss | -0.00259     |
|    reward               | 0.002496653  |
|    value_loss           | 0.0223       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.775         |
| time/                   |               |
|    fps                  | 165           |
|    iterations           | 950           |
|    time_elapsed         | 5891          |
|    total_timesteps      | 972800        |
| train/                  |               |
|    approx_kl            | 0.00066726014 |
|    clip_fraction        | 0.0111        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.229        |
|    explained_variance   | 0.000506      |
|    learning_rate        | 0.0002        |
|    loss                 | -0.00266      |
|    n_updates            | 9490          |
|    policy_gradient_loss | 0.000365      |
|    reward               | 0.025062986   |
|    value_loss           | 0.0238        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.773         |
| time/                   |               |
|    fps                  | 165           |
|    iterations           | 951           |
|    time_elapsed         | 5898          |
|    total_timesteps      | 973824        |
| train/                  |               |
|    approx_kl            | 0.00034690346 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.218        |
|    explained_variance   | 0.00132       |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0234        |
|    n_updates            | 9500          |
|    policy_gradient_loss | -0.000405     |
|    reward               | -0.0027290592 |
|    value_loss           | 0.0344        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.78         |
| time/                   |              |
|    fps                  | 165          |
|    iterations           | 952          |
|    time_elapsed         | 5905         |
|    total_timesteps      | 974848       |
| train/                  |              |
|    approx_kl            | 0.0011186743 |
|    clip_fraction        | 0.0274       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.218       |
|    explained_variance   | 0.000166     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00105     |
|    n_updates            | 9510         |
|    policy_gradient_loss | -0.00215     |
|    reward               | 0.0          |
|    value_loss           | 0.0262       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.795         |
| time/                   |               |
|    fps                  | 165           |
|    iterations           | 953           |
|    time_elapsed         | 5912          |
|    total_timesteps      | 975872        |
| train/                  |               |
|    approx_kl            | 0.00068870425 |
|    clip_fraction        | 0.0112        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.199        |
|    explained_variance   | 0.000632      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00096       |
|    n_updates            | 9520          |
|    policy_gradient_loss | -0.00146      |
|    reward               | 0.027873186   |
|    value_loss           | 0.0299        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.801        |
| time/                   |              |
|    fps                  | 165          |
|    iterations           | 954          |
|    time_elapsed         | 5919         |
|    total_timesteps      | 976896       |
| train/                  |              |
|    approx_kl            | 0.0018764642 |
|    clip_fraction        | 0.0292       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.225       |
|    explained_variance   | 0.00123      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00113      |
|    n_updates            | 9530         |
|    policy_gradient_loss | -0.00184     |
|    reward               | 0.079419926  |
|    value_loss           | 0.0265       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.801        |
| time/                   |              |
|    fps                  | 165          |
|    iterations           | 955          |
|    time_elapsed         | 5926         |
|    total_timesteps      | 977920       |
| train/                  |              |
|    approx_kl            | 0.0008630813 |
|    clip_fraction        | 0.0084       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.242       |
|    explained_variance   | 0.000828     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00198      |
|    n_updates            | 9540         |
|    policy_gradient_loss | -0.000987    |
|    reward               | 0.0          |
|    value_loss           | 0.0243       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.8           |
| time/                   |               |
|    fps                  | 164           |
|    iterations           | 956           |
|    time_elapsed         | 5933          |
|    total_timesteps      | 978944        |
| train/                  |               |
|    approx_kl            | 0.00068028836 |
|    clip_fraction        | 0.00381       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.243        |
|    explained_variance   | 0.00123       |
|    learning_rate        | 0.0002        |
|    loss                 | -0.00228      |
|    n_updates            | 9550          |
|    policy_gradient_loss | 0.000471      |
|    reward               | 0.0           |
|    value_loss           | 0.0246        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.8          |
| time/                   |              |
|    fps                  | 164          |
|    iterations           | 957          |
|    time_elapsed         | 5939         |
|    total_timesteps      | 979968       |
| train/                  |              |
|    approx_kl            | 0.0010561098 |
|    clip_fraction        | 0.0083       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.238       |
|    explained_variance   | 0.00102      |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00963     |
|    n_updates            | 9560         |
|    policy_gradient_loss | -0.000475    |
|    reward               | 0.0          |
|    value_loss           | 0.0261       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.798        |
| time/                   |              |
|    fps                  | 164          |
|    iterations           | 958          |
|    time_elapsed         | 5947         |
|    total_timesteps      | 980992       |
| train/                  |              |
|    approx_kl            | 0.0014614314 |
|    clip_fraction        | 0.0238       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.243       |
|    explained_variance   | 0.000591     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0191       |
|    n_updates            | 9570         |
|    policy_gradient_loss | -0.00182     |
|    reward               | 0.012200634  |
|    value_loss           | 0.0227       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.795         |
| time/                   |               |
|    fps                  | 164           |
|    iterations           | 959           |
|    time_elapsed         | 5954          |
|    total_timesteps      | 982016        |
| train/                  |               |
|    approx_kl            | 0.00039091968 |
|    clip_fraction        | 0.0188        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.218        |
|    explained_variance   | 0.00105       |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00178       |
|    n_updates            | 9580          |
|    policy_gradient_loss | -0.00359      |
|    reward               | 0.016129317   |
|    value_loss           | 0.0202        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.8          |
| time/                   |              |
|    fps                  | 164          |
|    iterations           | 960          |
|    time_elapsed         | 5961         |
|    total_timesteps      | 983040       |
| train/                  |              |
|    approx_kl            | 0.0007720735 |
|    clip_fraction        | 0.00557      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.209       |
|    explained_variance   | 0.000985     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00345      |
|    n_updates            | 9590         |
|    policy_gradient_loss | -0.000758    |
|    reward               | 0.0          |
|    value_loss           | 0.0232       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.794        |
| time/                   |              |
|    fps                  | 164          |
|    iterations           | 961          |
|    time_elapsed         | 5968         |
|    total_timesteps      | 984064       |
| train/                  |              |
|    approx_kl            | 0.0007279671 |
|    clip_fraction        | 0.00752      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.223       |
|    explained_variance   | 0.000667     |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00381     |
|    n_updates            | 9600         |
|    policy_gradient_loss | 0.000303     |
|    reward               | 0.0          |
|    value_loss           | 0.0264       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.8          |
| time/                   |              |
|    fps                  | 164          |
|    iterations           | 962          |
|    time_elapsed         | 5976         |
|    total_timesteps      | 985088       |
| train/                  |              |
|    approx_kl            | 0.0009663281 |
|    clip_fraction        | 0.00742      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.197       |
|    explained_variance   | 0.000664     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00411      |
|    n_updates            | 9610         |
|    policy_gradient_loss | -0.0013      |
|    reward               | 0.0          |
|    value_loss           | 0.0243       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.802        |
| time/                   |              |
|    fps                  | 164          |
|    iterations           | 963          |
|    time_elapsed         | 5984         |
|    total_timesteps      | 986112       |
| train/                  |              |
|    approx_kl            | 0.0010892223 |
|    clip_fraction        | 0.0263       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.205       |
|    explained_variance   | -0.000179    |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00225     |
|    n_updates            | 9620         |
|    policy_gradient_loss | -0.00289     |
|    reward               | 0.0          |
|    value_loss           | 0.0195       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.8           |
| time/                   |               |
|    fps                  | 164           |
|    iterations           | 964           |
|    time_elapsed         | 5991          |
|    total_timesteps      | 987136        |
| train/                  |               |
|    approx_kl            | 0.00060806674 |
|    clip_fraction        | 0.0043        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.2          |
|    explained_variance   | 0.000376      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00676       |
|    n_updates            | 9630          |
|    policy_gradient_loss | -0.000432     |
|    reward               | 0.0015917798  |
|    value_loss           | 0.0275        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.799        |
| time/                   |              |
|    fps                  | 164          |
|    iterations           | 965          |
|    time_elapsed         | 5998         |
|    total_timesteps      | 988160       |
| train/                  |              |
|    approx_kl            | 0.0006780282 |
|    clip_fraction        | 0.00967      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.182       |
|    explained_variance   | 0.000856     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0211       |
|    n_updates            | 9640         |
|    policy_gradient_loss | -0.00146     |
|    reward               | 0.02081878   |
|    value_loss           | 0.0256       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.813        |
| time/                   |              |
|    fps                  | 164          |
|    iterations           | 966          |
|    time_elapsed         | 6006         |
|    total_timesteps      | 989184       |
| train/                  |              |
|    approx_kl            | 0.0005891686 |
|    clip_fraction        | 0.00996      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.223       |
|    explained_variance   | 0.00105      |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0415       |
|    n_updates            | 9650         |
|    policy_gradient_loss | 0.00093      |
|    reward               | 0.0          |
|    value_loss           | 0.0336       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.809        |
| time/                   |              |
|    fps                  | 164          |
|    iterations           | 967          |
|    time_elapsed         | 6012         |
|    total_timesteps      | 990208       |
| train/                  |              |
|    approx_kl            | 0.0024521442 |
|    clip_fraction        | 0.0131       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.239       |
|    explained_variance   | -0.000133    |
|    learning_rate        | 0.0002       |
|    loss                 | -0.00391     |
|    n_updates            | 9660         |
|    policy_gradient_loss | -0.00252     |
|    reward               | -0.013488257 |
|    value_loss           | 0.0188       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.815        |
| time/                   |              |
|    fps                  | 164          |
|    iterations           | 968          |
|    time_elapsed         | 6019         |
|    total_timesteps      | 991232       |
| train/                  |              |
|    approx_kl            | 0.0013101744 |
|    clip_fraction        | 0.0176       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.303       |
|    explained_variance   | 0.000655     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00349      |
|    n_updates            | 9670         |
|    policy_gradient_loss | 0.000767     |
|    reward               | 0.084757276  |
|    value_loss           | 0.0208       |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 0.821       |
| time/                   |             |
|    fps                  | 164         |
|    iterations           | 969         |
|    time_elapsed         | 6026        |
|    total_timesteps      | 992256      |
| train/                  |             |
|    approx_kl            | 0.000830936 |
|    clip_fraction        | 0.00508     |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.284      |
|    explained_variance   | 0.00139     |
|    learning_rate        | 0.0002      |
|    loss                 | 0.00445     |
|    n_updates            | 9680        |
|    policy_gradient_loss | -0.000605   |
|    reward               | 0.02267357  |
|    value_loss           | 0.0164      |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.819        |
| time/                   |              |
|    fps                  | 164          |
|    iterations           | 970          |
|    time_elapsed         | 6034         |
|    total_timesteps      | 993280       |
| train/                  |              |
|    approx_kl            | 0.0011699093 |
|    clip_fraction        | 0.0345       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.321       |
|    explained_variance   | 0.000411     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.0165       |
|    n_updates            | 9690         |
|    policy_gradient_loss | -0.00108     |
|    reward               | 0.12000408   |
|    value_loss           | 0.0241       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.81         |
| time/                   |              |
|    fps                  | 164          |
|    iterations           | 971          |
|    time_elapsed         | 6041         |
|    total_timesteps      | 994304       |
| train/                  |              |
|    approx_kl            | 0.0011632633 |
|    clip_fraction        | 0.0228       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.339       |
|    explained_variance   | 0.000515     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00502      |
|    n_updates            | 9700         |
|    policy_gradient_loss | -0.00152     |
|    reward               | 0.063205965  |
|    value_loss           | 0.0222       |
------------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1e+03      |
|    ep_rew_mean          | 0.803      |
| time/                   |            |
|    fps                  | 164        |
|    iterations           | 972        |
|    time_elapsed         | 6048       |
|    total_timesteps      | 995328     |
| train/                  |            |
|    approx_kl            | 0.00136213 |
|    clip_fraction        | 0.0216     |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.295     |
|    explained_variance   | 0.00107    |
|    learning_rate        | 0.0002     |
|    loss                 | -0.00634   |
|    n_updates            | 9710       |
|    policy_gradient_loss | -0.00364   |
|    reward               | 0.0        |
|    value_loss           | 0.0188     |
----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 0.803       |
| time/                   |             |
|    fps                  | 164         |
|    iterations           | 973         |
|    time_elapsed         | 6055        |
|    total_timesteps      | 996352      |
| train/                  |             |
|    approx_kl            | 0.001420202 |
|    clip_fraction        | 0.00918     |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.258      |
|    explained_variance   | 0.000579    |
|    learning_rate        | 0.0002      |
|    loss                 | -0.00385    |
|    n_updates            | 9720        |
|    policy_gradient_loss | -0.00138    |
|    reward               | 0.009355218 |
|    value_loss           | 0.0205      |
-----------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.814         |
| time/                   |               |
|    fps                  | 164           |
|    iterations           | 974           |
|    time_elapsed         | 6062          |
|    total_timesteps      | 997376        |
| train/                  |               |
|    approx_kl            | 0.00055274327 |
|    clip_fraction        | 0.0156        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.265        |
|    explained_variance   | 0.000729      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.0066        |
|    n_updates            | 9730          |
|    policy_gradient_loss | -0.000672     |
|    reward               | 0.0047642374  |
|    value_loss           | 0.0152        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.81          |
| time/                   |               |
|    fps                  | 164           |
|    iterations           | 975           |
|    time_elapsed         | 6069          |
|    total_timesteps      | 998400        |
| train/                  |               |
|    approx_kl            | 0.00074176764 |
|    clip_fraction        | 0.0129        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.233        |
|    explained_variance   | 0.000407      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00289       |
|    n_updates            | 9740          |
|    policy_gradient_loss | -0.00194      |
|    reward               | 0.0           |
|    value_loss           | 0.0291        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 0.806        |
| time/                   |              |
|    fps                  | 164          |
|    iterations           | 976          |
|    time_elapsed         | 6076         |
|    total_timesteps      | 999424       |
| train/                  |              |
|    approx_kl            | 0.0009774028 |
|    clip_fraction        | 0.0107       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.271       |
|    explained_variance   | 0.000226     |
|    learning_rate        | 0.0002       |
|    loss                 | 0.00426      |
|    n_updates            | 9750         |
|    policy_gradient_loss | 5.24e-05     |
|    reward               | 0.036227506  |
|    value_loss           | 0.0193       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 0.806         |
| time/                   |               |
|    fps                  | 164           |
|    iterations           | 977           |
|    time_elapsed         | 6083          |
|    total_timesteps      | 1000448       |
| train/                  |               |
|    approx_kl            | 0.00052788906 |
|    clip_fraction        | 0.0124        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.266        |
|    explained_variance   | 0.000662      |
|    learning_rate        | 0.0002        |
|    loss                 | 0.00102       |
|    n_updates            | 9760          |
|    policy_gradient_loss | -0.000216     |
|    reward               | 0.0073506264  |
|    value_loss           | 0.0224        |
-------------------------------------------


Total trained timestep: 1000448


In [None]:
agent.predict(selected_model,full_env,render=True)