In [1]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import pandas as pd
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)
import feature_manager as fma
import rl.env_simple_crypto_trade as env
import importlib
import rl.models as rla
import config as cf
import tr_utils
import torch as th

In [2]:
importlib.reload(fma)

fm = fma.FeatureManager()
fm.import_data(symbol="BTCUSDT",timeframes=["4h","1d","1w","1mo"])

fm.build_features(
    lags = [1,1,1,1],
    features=cf.DEFAULT_INDICATORS,
)

Imported observe data 4h from ../data/BTCUSDT-4h.csv with 11760 rows
Imported observe data 1d from ../data/BTCUSDT-1d.csv with 1963 rows
Imported observe data 1w from ../data/BTCUSDT-1w.csv with 247 rows
Imported observe data 1mo from ../data/BTCUSDT-1mo.csv with 65 rows
Calculating external features ...
sma_3_10, sma_7_30, rsi7, rsi14, rsi30, cci7, cci14, cci30, dx7, dx14, dx30, hashrate, fed_rate, gold, nasdaq, sp500, google_trend, 
sma_3_10, sma_7_30, rsi7, rsi14, rsi30, cci7, cci14, cci30, dx7, dx14, dx30, 
sma_3_10, sma_7_30, rsi7, rsi14, rsi30, cci7, cci14, cci30, dx7, dx14, dx30, 
sma_3_10, rsi7, rsi14, cci7, cci14, dx7, dx14, 

Normalizing features with MaxAbs: sma_3_10_level0_lag_1, sma_7_30_level0_lag_1, rsi7_level0_lag_1, rsi14_level0_lag_1, rsi30_level0_lag_1, cci7_level0_lag_1, cci14_level0_lag_1, cci30_level0_lag_1, dx7_level0_lag_1, dx14_level0_lag_1, dx30_level0_lag_1, hashrate_level0_lag_1, fed_rate_level0_lag_1, gold_level0_lag_1, nasdaq_level0_lag_1, sp500_level0_lag

In [3]:
train = fm.df.iloc[-4000:-2000]
trade = fm.df.iloc[-2000:]

In [4]:
importlib.reload(env)
importlib.reload(cf)

env_kwargs = cf.TRADE_ENV_PARAMETER
state_space = 10 + len(fm.cols)

train_env = env.CryptoTradingEnv(
    trade_timeframe="4h",
    df = train, 
    state_space=state_space,
    indicators=fm.cols,
    **env_kwargs)

trade_env = env.CryptoTradingEnv(
    trade_timeframe="4h",
    df = trade, 
    state_space=state_space,
    indicators=fm.cols,
    **env_kwargs)

full_env = env.CryptoTradingEnv(
    trade_timeframe="4h",
    df = fm.df, 
    state_space=state_space,
    indicators=fm.cols,
    **env_kwargs)

In [5]:
len(full_env.df)

9126

In [6]:
importlib.reload(rla)
agent = rla.DRLTradeAgent(env=full_env)

In [7]:
importlib.reload(rla)

# DQN_PARAMS = {
#     "learning_rate": 1e-4,
#     "buffer_size": 100_000,  
#     "learning_starts": 100_000,
#     "batch_size": 64,
#     "tau": 1.0,
#     "gamma": 0.9999,
#     "train_freq": 4,
#     "target_update_interval": 10000,
#     "exploration_fraction": 0.3,
#     "exploration_initial_eps": 1.0,
#     "exploration_final_eps": 0.05,
# }

# catalog_name = tr_utils.get_name_with_kwargs("dqn",DQN_PARAMS)

# dqn_model = agent.get_model(
#     model_name="dqn",
#     model_kwargs=DQN_PARAMS,
#     seed=100,
#     tensorboard_log=catalog_name
# )

PPO_MODEL_PARAMS = {
    "n_steps": 9200,
    "ent_coef": 0.02,
    "learning_rate": 0.00025,
    "batch_size": 230
}

PPO_POLICY_PARAMS = {
    "activation_fn":th.nn.Tanh,
    "net_arch": dict(pi=[128], vf=[128])
}

catalog_name = tr_utils.get_name_with_kwargs("add_1layer128_tanh",PPO_MODEL_PARAMS)

ppo_model = agent.get_model(
    model_name="ppo",
    model_kwargs = PPO_MODEL_PARAMS,
    policy_kwargs= PPO_POLICY_PARAMS,
    tensorboard_log=catalog_name,
    seed = 100
)


Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [8]:
ppo_model.policy_kwargs

{'activation_fn': torch.nn.modules.activation.Tanh,
 'net_arch': {'pi': [128], 'vf': [128]}}

In [9]:
selected_model = ppo_model

selected_model = agent.train_model(
    model = selected_model,
    total_timesteps = 2_000_000,
    checkpoint = True,
    catalog_name = catalog_name,
    save_frequency = 20_000,
    progress_bar = True
)

Logging to ../logs/tensorboard_log/add_1layer128_tanh_n_steps_9200_ent_coef_0.02_learning_rate_0.00025_batch_size_230/PPO_0


Output()

-------------------------------------
| rollout/           |              |
|    ep_len_mean     | 9.13e+03     |
|    ep_rew_mean     | -3.7         |
| time/              |              |
|    fps             | 223          |
|    iterations      | 1            |
|    time_elapsed    | 41           |
|    total_timesteps | 9200         |
| train/             |              |
|    reward          | 0.0023984804 |
-------------------------------------


--------------------------------------------
| rollout/                |                |
|    ep_len_mean          | 9.13e+03       |
|    ep_rew_mean          | -2.13          |
| time/                   |                |
|    fps                  | 228            |
|    iterations           | 2              |
|    time_elapsed         | 80             |
|    total_timesteps      | 18400          |
| train/                  |                |
|    approx_kl            | 0.0070958612   |
|    clip_fraction        | 0.0371         |
|    clip_range           | 0.2            |
|    entropy_loss         | -1.09          |
|    explained_variance   | 0.0147         |
|    learning_rate        | 0.00025        |
|    loss                 | -0.0254        |
|    n_updates            | 10             |
|    policy_gradient_loss | -0.00111       |
|    reward               | -0.00050012505 |
|    value_loss           | 0.00146        |
--------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 9.13e+03     |
|    ep_rew_mean          | -2.88        |
| time/                   |              |
|    fps                  | 227          |
|    iterations           | 3            |
|    time_elapsed         | 121          |
|    total_timesteps      | 27600        |
| train/                  |              |
|    approx_kl            | 0.007623489  |
|    clip_fraction        | 0.0232       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.09        |
|    explained_variance   | -5.41e-05    |
|    learning_rate        | 0.00025      |
|    loss                 | -0.0195      |
|    n_updates            | 20           |
|    policy_gradient_loss | 8.85e-05     |
|    reward               | 0.0008002301 |
|    value_loss           | 0.00148      |
------------------------------------------


--------------------------------------------
| rollout/                |                |
|    ep_len_mean          | 9.13e+03       |
|    ep_rew_mean          | -3.04          |
| time/                   |                |
|    fps                  | 226            |
|    iterations           | 4              |
|    time_elapsed         | 162            |
|    total_timesteps      | 36800          |
| train/                  |                |
|    approx_kl            | 0.0091490345   |
|    clip_fraction        | 0.0643         |
|    clip_range           | 0.2            |
|    entropy_loss         | -1.07          |
|    explained_variance   | -7.27e-06      |
|    learning_rate        | 0.00025        |
|    loss                 | -0.0264        |
|    n_updates            | 30             |
|    policy_gradient_loss | -0.00222       |
|    reward               | -0.00050012505 |
|    value_loss           | 0.00155        |
--------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | -2.79         |
| time/                   |               |
|    fps                  | 225           |
|    iterations           | 5             |
|    time_elapsed         | 203           |
|    total_timesteps      | 46000         |
| train/                  |               |
|    approx_kl            | 0.009884022   |
|    clip_fraction        | 0.05          |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.06         |
|    explained_variance   | -2.47e-05     |
|    learning_rate        | 0.00025       |
|    loss                 | -0.0161       |
|    n_updates            | 40            |
|    policy_gradient_loss | -0.00166      |
|    reward               | 0.00030010505 |
|    value_loss           | 0.00126       |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 9.13e+03     |
|    ep_rew_mean          | -2.55        |
| time/                   |              |
|    fps                  | 225          |
|    iterations           | 6            |
|    time_elapsed         | 244          |
|    total_timesteps      | 55200        |
| train/                  |              |
|    approx_kl            | 0.007847625  |
|    clip_fraction        | 0.0294       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.05        |
|    explained_variance   | -2.38e-07    |
|    learning_rate        | 0.00025      |
|    loss                 | -0.0226      |
|    n_updates            | 50           |
|    policy_gradient_loss | 0.000129     |
|    reward               | -0.010276435 |
|    value_loss           | 0.00128      |
------------------------------------------


--------------------------------------------
| rollout/                |                |
|    ep_len_mean          | 9.13e+03       |
|    ep_rew_mean          | -2.52          |
| time/                   |                |
|    fps                  | 225            |
|    iterations           | 7              |
|    time_elapsed         | 286            |
|    total_timesteps      | 64400          |
| train/                  |                |
|    approx_kl            | 0.008869928    |
|    clip_fraction        | 0.0384         |
|    clip_range           | 0.2            |
|    entropy_loss         | -1.06          |
|    explained_variance   | 9.42e-06       |
|    learning_rate        | 0.00025        |
|    loss                 | -0.0386        |
|    n_updates            | 60             |
|    policy_gradient_loss | -0.000394      |
|    reward               | -0.00050012505 |
|    value_loss           | 0.00105        |
--------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 9.13e+03     |
|    ep_rew_mean          | -2.44        |
| time/                   |              |
|    fps                  | 225          |
|    iterations           | 8            |
|    time_elapsed         | 326          |
|    total_timesteps      | 73600        |
| train/                  |              |
|    approx_kl            | 0.0029715851 |
|    clip_fraction        | 0.0305       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.04        |
|    explained_variance   | -5.96e-06    |
|    learning_rate        | 0.00025      |
|    loss                 | -0.0117      |
|    n_updates            | 70           |
|    policy_gradient_loss | -0.000793    |
|    reward               | 0.0          |
|    value_loss           | 0.00113      |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | -1.94         |
| time/                   |               |
|    fps                  | 225           |
|    iterations           | 10            |
|    time_elapsed         | 407           |
|    total_timesteps      | 92000         |
| train/                  |               |
|    approx_kl            | 0.010069897   |
|    clip_fraction        | 0.0563        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.977        |
|    explained_variance   | 1.97e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | -0.0359       |
|    n_updates            | 90            |
|    policy_gradient_loss | -0.00195      |
|    reward               | -0.0016845608 |
|    value_loss           | 0.00138       |
-------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 9.13e+03    |
|    ep_rew_mean          | -1.95       |
| time/                   |             |
|    fps                  | 226         |
|    iterations           | 11          |
|    time_elapsed         | 447         |
|    total_timesteps      | 101200      |
| train/                  |             |
|    approx_kl            | 0.008441758 |
|    clip_fraction        | 0.0152      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.988      |
|    explained_variance   | -4.41e-06   |
|    learning_rate        | 0.00025     |
|    loss                 | -0.0098     |
|    n_updates            | 100         |
|    policy_gradient_loss | 0.000531    |
|    reward               | -0.00020002 |
|    value_loss           | 0.0013      |
-----------------------------------------


--------------------------------------------
| rollout/                |                |
|    ep_len_mean          | 9.13e+03       |
|    ep_rew_mean          | -1.98          |
| time/                   |                |
|    fps                  | 226            |
|    iterations           | 12             |
|    time_elapsed         | 487            |
|    total_timesteps      | 110400         |
| train/                  |                |
|    approx_kl            | 0.003036075    |
|    clip_fraction        | 0.0273         |
|    clip_range           | 0.2            |
|    entropy_loss         | -0.976         |
|    explained_variance   | 2.15e-06       |
|    learning_rate        | 0.00025        |
|    loss                 | -0.0184        |
|    n_updates            | 110            |
|    policy_gradient_loss | 0.000808       |
|    reward               | -0.00050012505 |
|    value_loss           | 0.00152        |
--------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 9.13e+03    |
|    ep_rew_mean          | -1.73       |
| time/                   |             |
|    fps                  | 226         |
|    iterations           | 13          |
|    time_elapsed         | 528         |
|    total_timesteps      | 119600      |
| train/                  |             |
|    approx_kl            | 0.006177504 |
|    clip_fraction        | 0.017       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.967      |
|    explained_variance   | -5.2e-05    |
|    learning_rate        | 0.00025     |
|    loss                 | -0.0151     |
|    n_updates            | 120         |
|    policy_gradient_loss | -0.00022    |
|    reward               | -0.00020002 |
|    value_loss           | 0.00142     |
-----------------------------------------


--------------------------------------------
| rollout/                |                |
|    ep_len_mean          | 9.13e+03       |
|    ep_rew_mean          | -1.79          |
| time/                   |                |
|    fps                  | 226            |
|    iterations           | 14             |
|    time_elapsed         | 569            |
|    total_timesteps      | 128800         |
| train/                  |                |
|    approx_kl            | 0.010221419    |
|    clip_fraction        | 0.0368         |
|    clip_range           | 0.2            |
|    entropy_loss         | -0.993         |
|    explained_variance   | -8.34e-07      |
|    learning_rate        | 0.00025        |
|    loss                 | -0.0154        |
|    n_updates            | 130            |
|    policy_gradient_loss | 0.000114       |
|    reward               | -0.00050012505 |
|    value_loss           | 0.00165        |
--------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | -1.81         |
| time/                   |               |
|    fps                  | 225           |
|    iterations           | 15            |
|    time_elapsed         | 610           |
|    total_timesteps      | 138000        |
| train/                  |               |
|    approx_kl            | 0.005033227   |
|    clip_fraction        | 0.0223        |
|    clip_range           | 0.2           |
|    entropy_loss         | -1            |
|    explained_variance   | 7.33e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | -0.0164       |
|    n_updates            | 140           |
|    policy_gradient_loss | 0.000579      |
|    reward               | -0.0023813723 |
|    value_loss           | 0.00149       |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 9.13e+03     |
|    ep_rew_mean          | -1.83        |
| time/                   |              |
|    fps                  | 226          |
|    iterations           | 16           |
|    time_elapsed         | 650          |
|    total_timesteps      | 147200       |
| train/                  |              |
|    approx_kl            | 0.011540016  |
|    clip_fraction        | 0.0191       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.03        |
|    explained_variance   | -2.98e-06    |
|    learning_rate        | 0.00025      |
|    loss                 | -0.00538     |
|    n_updates            | 150          |
|    policy_gradient_loss | 0.000694     |
|    reward               | 0.0036370656 |
|    value_loss           | 0.00144      |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 9.13e+03    |
|    ep_rew_mean          | -1.79       |
| time/                   |             |
|    fps                  | 226         |
|    iterations           | 17          |
|    time_elapsed         | 691         |
|    total_timesteps      | 156400      |
| train/                  |             |
|    approx_kl            | 0.011006454 |
|    clip_fraction        | 0.0196      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.06       |
|    explained_variance   | -7.87e-06   |
|    learning_rate        | 0.00025     |
|    loss                 | -0.0159     |
|    n_updates            | 160         |
|    policy_gradient_loss | 0.000236    |
|    reward               | -0.00020002 |
|    value_loss           | 0.0012      |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 9.13e+03     |
|    ep_rew_mean          | -1.68        |
| time/                   |              |
|    fps                  | 226          |
|    iterations           | 18           |
|    time_elapsed         | 732          |
|    total_timesteps      | 165600       |
| train/                  |              |
|    approx_kl            | 0.010124672  |
|    clip_fraction        | 0.0681       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.02        |
|    explained_variance   | -9.54e-06    |
|    learning_rate        | 0.00025      |
|    loss                 | -0.0137      |
|    n_updates            | 170          |
|    policy_gradient_loss | -0.00255     |
|    reward               | -0.014226776 |
|    value_loss           | 0.00133      |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 9.13e+03    |
|    ep_rew_mean          | -1.71       |
| time/                   |             |
|    fps                  | 226         |
|    iterations           | 19          |
|    time_elapsed         | 772         |
|    total_timesteps      | 174800      |
| train/                  |             |
|    approx_kl            | 0.012229174 |
|    clip_fraction        | 0.108       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.988      |
|    explained_variance   | -5.36e-06   |
|    learning_rate        | 0.00025     |
|    loss                 | -0.0257     |
|    n_updates            | 180         |
|    policy_gradient_loss | -0.00421    |
|    reward               | 0.0         |
|    value_loss           | 0.00132     |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 9.13e+03    |
|    ep_rew_mean          | -1.76       |
| time/                   |             |
|    fps                  | 226         |
|    iterations           | 20          |
|    time_elapsed         | 813         |
|    total_timesteps      | 184000      |
| train/                  |             |
|    approx_kl            | 0.011769752 |
|    clip_fraction        | 0.0364      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.948      |
|    explained_variance   | -7.87e-06   |
|    learning_rate        | 0.00025     |
|    loss                 | -0.0127     |
|    n_updates            | 190         |
|    policy_gradient_loss | -0.00142    |
|    reward               | 0.0         |
|    value_loss           | 0.0014      |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 9.13e+03     |
|    ep_rew_mean          | -1.73        |
| time/                   |              |
|    fps                  | 225          |
|    iterations           | 21           |
|    time_elapsed         | 855          |
|    total_timesteps      | 193200       |
| train/                  |              |
|    approx_kl            | 0.011178408  |
|    clip_fraction        | 0.039        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.878       |
|    explained_variance   | -3.81e-06    |
|    learning_rate        | 0.00025      |
|    loss                 | 0.00236      |
|    n_updates            | 200          |
|    policy_gradient_loss | -0.00178     |
|    reward               | 0.0041807606 |
|    value_loss           | 0.00173      |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 9.13e+03     |
|    ep_rew_mean          | -1.68        |
| time/                   |              |
|    fps                  | 225          |
|    iterations           | 22           |
|    time_elapsed         | 895          |
|    total_timesteps      | 202400       |
| train/                  |              |
|    approx_kl            | 0.00846768   |
|    clip_fraction        | 0.055        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.784       |
|    explained_variance   | 7.57e-06     |
|    learning_rate        | 0.00025      |
|    loss                 | -0.0129      |
|    n_updates            | 210          |
|    policy_gradient_loss | -0.00408     |
|    reward               | -0.010345752 |
|    value_loss           | 0.00152      |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 9.13e+03    |
|    ep_rew_mean          | -1.63       |
| time/                   |             |
|    fps                  | 225         |
|    iterations           | 23          |
|    time_elapsed         | 938         |
|    total_timesteps      | 211600      |
| train/                  |             |
|    approx_kl            | 0.005180389 |
|    clip_fraction        | 0.0378      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.715      |
|    explained_variance   | -5.48e-06   |
|    learning_rate        | 0.00025     |
|    loss                 | -0.0178     |
|    n_updates            | 220         |
|    policy_gradient_loss | -0.00284    |
|    reward               | 0.0         |
|    value_loss           | 0.00168     |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 9.13e+03     |
|    ep_rew_mean          | -1.62        |
| time/                   |              |
|    fps                  | 225          |
|    iterations           | 24           |
|    time_elapsed         | 979          |
|    total_timesteps      | 220800       |
| train/                  |              |
|    approx_kl            | 0.0044993344 |
|    clip_fraction        | 0.0326       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.635       |
|    explained_variance   | -3.58e-06    |
|    learning_rate        | 0.00025      |
|    loss                 | -0.00728     |
|    n_updates            | 230          |
|    policy_gradient_loss | -0.00233     |
|    reward               | 0.002883734  |
|    value_loss           | 0.00165      |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 9.13e+03    |
|    ep_rew_mean          | -1.54       |
| time/                   |             |
|    fps                  | 225         |
|    iterations           | 25          |
|    time_elapsed         | 1020        |
|    total_timesteps      | 230000      |
| train/                  |             |
|    approx_kl            | 0.004138636 |
|    clip_fraction        | 0.0379      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.565      |
|    explained_variance   | -5.96e-06   |
|    learning_rate        | 0.00025     |
|    loss                 | -0.0105     |
|    n_updates            | 240         |
|    policy_gradient_loss | -0.00296    |
|    reward               | 0.006479005 |
|    value_loss           | 0.00209     |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 9.13e+03     |
|    ep_rew_mean          | -1.49        |
| time/                   |              |
|    fps                  | 225          |
|    iterations           | 26           |
|    time_elapsed         | 1061         |
|    total_timesteps      | 239200       |
| train/                  |              |
|    approx_kl            | 0.0022729072 |
|    clip_fraction        | 0.0244       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.52        |
|    explained_variance   | 3.04e-06     |
|    learning_rate        | 0.00025      |
|    loss                 | -0.00735     |
|    n_updates            | 250          |
|    policy_gradient_loss | -0.00246     |
|    reward               | 0.001283672  |
|    value_loss           | 0.0018       |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 9.13e+03     |
|    ep_rew_mean          | -1.44        |
| time/                   |              |
|    fps                  | 225          |
|    iterations           | 27           |
|    time_elapsed         | 1101         |
|    total_timesteps      | 248400       |
| train/                  |              |
|    approx_kl            | 0.0036374473 |
|    clip_fraction        | 0.0327       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.452       |
|    explained_variance   | 3.99e-06     |
|    learning_rate        | 0.00025      |
|    loss                 | -0.00552     |
|    n_updates            | 260          |
|    policy_gradient_loss | -0.00312     |
|    reward               | 0.002153409  |
|    value_loss           | 0.00232      |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 9.13e+03    |
|    ep_rew_mean          | -1.35       |
| time/                   |             |
|    fps                  | 225         |
|    iterations           | 28          |
|    time_elapsed         | 1142        |
|    total_timesteps      | 257600      |
| train/                  |             |
|    approx_kl            | 0.002499077 |
|    clip_fraction        | 0.0278      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.382      |
|    explained_variance   | -4.29e-06   |
|    learning_rate        | 0.00025     |
|    loss                 | -0.0147     |
|    n_updates            | 270         |
|    policy_gradient_loss | -0.003      |
|    reward               | 0.001874291 |
|    value_loss           | 0.00258     |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 9.13e+03    |
|    ep_rew_mean          | -1.28       |
| time/                   |             |
|    fps                  | 225         |
|    iterations           | 29          |
|    time_elapsed         | 1182        |
|    total_timesteps      | 266800      |
| train/                  |             |
|    approx_kl            | 0.002157845 |
|    clip_fraction        | 0.0228      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.332      |
|    explained_variance   | 1.49e-06    |
|    learning_rate        | 0.00025     |
|    loss                 | -0.00802    |
|    n_updates            | 280         |
|    policy_gradient_loss | -0.00265    |
|    reward               | 0.06642396  |
|    value_loss           | 0.00265     |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 9.13e+03     |
|    ep_rew_mean          | -1.23        |
| time/                   |              |
|    fps                  | 225          |
|    iterations           | 30           |
|    time_elapsed         | 1223         |
|    total_timesteps      | 276000       |
| train/                  |              |
|    approx_kl            | 0.0012012112 |
|    clip_fraction        | 0.0178       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.299       |
|    explained_variance   | 5.01e-06     |
|    learning_rate        | 0.00025      |
|    loss                 | -0.00598     |
|    n_updates            | 290          |
|    policy_gradient_loss | -0.00215     |
|    reward               | 0.0044003725 |
|    value_loss           | 0.0033       |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 9.13e+03    |
|    ep_rew_mean          | -1.17       |
| time/                   |             |
|    fps                  | 225         |
|    iterations           | 31          |
|    time_elapsed         | 1264        |
|    total_timesteps      | 285200      |
| train/                  |             |
|    approx_kl            | 0.001472356 |
|    clip_fraction        | 0.0209      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.283      |
|    explained_variance   | 1.79e-06    |
|    learning_rate        | 0.00025     |
|    loss                 | -0.00734    |
|    n_updates            | 300         |
|    policy_gradient_loss | -0.00229    |
|    reward               | 0.0         |
|    value_loss           | 0.0033      |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 9.13e+03     |
|    ep_rew_mean          | -1.13        |
| time/                   |              |
|    fps                  | 225          |
|    iterations           | 32           |
|    time_elapsed         | 1305         |
|    total_timesteps      | 294400       |
| train/                  |              |
|    approx_kl            | 0.0010954898 |
|    clip_fraction        | 0.0138       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.238       |
|    explained_variance   | -2.5e-06     |
|    learning_rate        | 0.00025      |
|    loss                 | -0.00345     |
|    n_updates            | 310          |
|    policy_gradient_loss | -0.00171     |
|    reward               | 0.0          |
|    value_loss           | 0.00336      |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | -1.08         |
| time/                   |               |
|    fps                  | 225           |
|    iterations           | 33            |
|    time_elapsed         | 1345          |
|    total_timesteps      | 303600        |
| train/                  |               |
|    approx_kl            | 0.001059171   |
|    clip_fraction        | 0.0143        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.206        |
|    explained_variance   | 2.62e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | -0.00264      |
|    n_updates            | 320           |
|    policy_gradient_loss | -0.00168      |
|    reward               | 0.00057178887 |
|    value_loss           | 0.00416       |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 9.13e+03     |
|    ep_rew_mean          | -1.02        |
| time/                   |              |
|    fps                  | 225          |
|    iterations           | 34           |
|    time_elapsed         | 1386         |
|    total_timesteps      | 312800       |
| train/                  |              |
|    approx_kl            | 0.0007254907 |
|    clip_fraction        | 0.00743      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.181       |
|    explained_variance   | 4.77e-07     |
|    learning_rate        | 0.00025      |
|    loss                 | -0.00125     |
|    n_updates            | 330          |
|    policy_gradient_loss | -0.00091     |
|    reward               | 0.0032755255 |
|    value_loss           | 0.00419      |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | -0.956        |
| time/                   |               |
|    fps                  | 225           |
|    iterations           | 35            |
|    time_elapsed         | 1426          |
|    total_timesteps      | 322000        |
| train/                  |               |
|    approx_kl            | 0.00060870603 |
|    clip_fraction        | 0.00826       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.165        |
|    explained_variance   | -1.79e-06     |
|    learning_rate        | 0.00025       |
|    loss                 | 0.00114       |
|    n_updates            | 340           |
|    policy_gradient_loss | -0.00109      |
|    reward               | 0.0           |
|    value_loss           | 0.0042        |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 9.13e+03     |
|    ep_rew_mean          | -0.892       |
| time/                   |              |
|    fps                  | 225          |
|    iterations           | 36           |
|    time_elapsed         | 1466         |
|    total_timesteps      | 331200       |
| train/                  |              |
|    approx_kl            | 0.0004008237 |
|    clip_fraction        | 0.00408      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.143       |
|    explained_variance   | 1.01e-06     |
|    learning_rate        | 0.00025      |
|    loss                 | -0.00556     |
|    n_updates            | 350          |
|    policy_gradient_loss | -0.000342    |
|    reward               | 0.005824222  |
|    value_loss           | 0.00547      |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | -0.847        |
| time/                   |               |
|    fps                  | 225           |
|    iterations           | 37            |
|    time_elapsed         | 1508          |
|    total_timesteps      | 340400        |
| train/                  |               |
|    approx_kl            | 0.00025706095 |
|    clip_fraction        | 0.00587       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.123        |
|    explained_variance   | 1.85e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | 0.00251       |
|    n_updates            | 360           |
|    policy_gradient_loss | -0.000637     |
|    reward               | 0.0           |
|    value_loss           | 0.00476       |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 9.13e+03     |
|    ep_rew_mean          | -0.803       |
| time/                   |              |
|    fps                  | 225          |
|    iterations           | 38           |
|    time_elapsed         | 1551         |
|    total_timesteps      | 349600       |
| train/                  |              |
|    approx_kl            | 0.0003634903 |
|    clip_fraction        | 0.00539      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.115       |
|    explained_variance   | 3.28e-06     |
|    learning_rate        | 0.00025      |
|    loss                 | 0.00108      |
|    n_updates            | 370          |
|    policy_gradient_loss | -0.000318    |
|    reward               | 0.009157956  |
|    value_loss           | 0.00559      |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | -0.767        |
| time/                   |               |
|    fps                  | 225           |
|    iterations           | 39            |
|    time_elapsed         | 1591          |
|    total_timesteps      | 358800        |
| train/                  |               |
|    approx_kl            | 0.00014038882 |
|    clip_fraction        | 0.00338       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.101        |
|    explained_variance   | 3.87e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | 0.00031       |
|    n_updates            | 380           |
|    policy_gradient_loss | -0.000102     |
|    reward               | -0.36770755   |
|    value_loss           | 0.00654       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | -0.735        |
| time/                   |               |
|    fps                  | 225           |
|    iterations           | 40            |
|    time_elapsed         | 1633          |
|    total_timesteps      | 368000        |
| train/                  |               |
|    approx_kl            | 0.00020235412 |
|    clip_fraction        | 0.00224       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.101        |
|    explained_variance   | -2.62e-06     |
|    learning_rate        | 0.00025       |
|    loss                 | 0.00133       |
|    n_updates            | 390           |
|    policy_gradient_loss | 0.000274      |
|    reward               | -0.01327892   |
|    value_loss           | 0.00593       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | -0.699        |
| time/                   |               |
|    fps                  | 225           |
|    iterations           | 41            |
|    time_elapsed         | 1673          |
|    total_timesteps      | 377200        |
| train/                  |               |
|    approx_kl            | 0.00018882532 |
|    clip_fraction        | 0.00553       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.106        |
|    explained_variance   | -3.58e-07     |
|    learning_rate        | 0.00025       |
|    loss                 | -0.00261      |
|    n_updates            | 400           |
|    policy_gradient_loss | -0.000329     |
|    reward               | 0.00060683844 |
|    value_loss           | 0.00586       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | -0.656        |
| time/                   |               |
|    fps                  | 225           |
|    iterations           | 42            |
|    time_elapsed         | 1713          |
|    total_timesteps      | 386400        |
| train/                  |               |
|    approx_kl            | 0.00044493098 |
|    clip_fraction        | 0.00953       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.116        |
|    explained_variance   | 1.28e-05      |
|    learning_rate        | 0.00025       |
|    loss                 | -0.00181      |
|    n_updates            | 410           |
|    policy_gradient_loss | -0.00103      |
|    reward               | 0.0           |
|    value_loss           | 0.00526       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | -0.623        |
| time/                   |               |
|    fps                  | 225           |
|    iterations           | 43            |
|    time_elapsed         | 1753          |
|    total_timesteps      | 395600        |
| train/                  |               |
|    approx_kl            | 0.00044309124 |
|    clip_fraction        | 0.0043        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.11         |
|    explained_variance   | 6.44e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | -0.00408      |
|    n_updates            | 420           |
|    policy_gradient_loss | -0.000277     |
|    reward               | 0.0           |
|    value_loss           | 0.00465       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | -0.585        |
| time/                   |               |
|    fps                  | 225           |
|    iterations           | 44            |
|    time_elapsed         | 1794          |
|    total_timesteps      | 404800        |
| train/                  |               |
|    approx_kl            | 0.00030003994 |
|    clip_fraction        | 0.00449       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.11         |
|    explained_variance   | 1.13e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | 0.000613      |
|    n_updates            | 430           |
|    policy_gradient_loss | -8.28e-05     |
|    reward               | 0.0026624363  |
|    value_loss           | 0.00662       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | -0.56         |
| time/                   |               |
|    fps                  | 225           |
|    iterations           | 45            |
|    time_elapsed         | 1834          |
|    total_timesteps      | 414000        |
| train/                  |               |
|    approx_kl            | 0.00013631901 |
|    clip_fraction        | 0.00373       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.111        |
|    explained_variance   | 1.19e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | -0.00199      |
|    n_updates            | 440           |
|    policy_gradient_loss | 0.000148      |
|    reward               | 0.0           |
|    value_loss           | 0.00518       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | -0.54         |
| time/                   |               |
|    fps                  | 225           |
|    iterations           | 46            |
|    time_elapsed         | 1875          |
|    total_timesteps      | 423200        |
| train/                  |               |
|    approx_kl            | 0.00023372509 |
|    clip_fraction        | 0.00522       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.102        |
|    explained_variance   | 1.07e-05      |
|    learning_rate        | 0.00025       |
|    loss                 | -0.00198      |
|    n_updates            | 450           |
|    policy_gradient_loss | -0.000219     |
|    reward               | 0.008304726   |
|    value_loss           | 0.00549       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | -0.516        |
| time/                   |               |
|    fps                  | 225           |
|    iterations           | 47            |
|    time_elapsed         | 1916          |
|    total_timesteps      | 432400        |
| train/                  |               |
|    approx_kl            | 0.00028484315 |
|    clip_fraction        | 0.00796       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.113        |
|    explained_variance   | 4.17e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | 0.000523      |
|    n_updates            | 460           |
|    policy_gradient_loss | -0.000608     |
|    reward               | 0.001682756   |
|    value_loss           | 0.00563       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | -0.496        |
| time/                   |               |
|    fps                  | 225           |
|    iterations           | 48            |
|    time_elapsed         | 1956          |
|    total_timesteps      | 441600        |
| train/                  |               |
|    approx_kl            | 0.00033617206 |
|    clip_fraction        | 0.00387       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0965       |
|    explained_variance   | -2.38e-07     |
|    learning_rate        | 0.00025       |
|    loss                 | -0.00161      |
|    n_updates            | 470           |
|    policy_gradient_loss | -0.00041      |
|    reward               | 0.012938665   |
|    value_loss           | 0.00635       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | -0.468        |
| time/                   |               |
|    fps                  | 225           |
|    iterations           | 49            |
|    time_elapsed         | 1997          |
|    total_timesteps      | 450800        |
| train/                  |               |
|    approx_kl            | 4.7008645e-05 |
|    clip_fraction        | 0.00307       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.101        |
|    explained_variance   | -3.34e-06     |
|    learning_rate        | 0.00025       |
|    loss                 | 0.000822      |
|    n_updates            | 480           |
|    policy_gradient_loss | 0.000242      |
|    reward               | 0.0029595234  |
|    value_loss           | 0.00582       |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 9.13e+03     |
|    ep_rew_mean          | -0.441       |
| time/                   |              |
|    fps                  | 225          |
|    iterations           | 50           |
|    time_elapsed         | 2038         |
|    total_timesteps      | 460000       |
| train/                  |              |
|    approx_kl            | 0.0002661318 |
|    clip_fraction        | 0.00551      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.111       |
|    explained_variance   | -2.15e-06    |
|    learning_rate        | 0.00025      |
|    loss                 | 0.0012       |
|    n_updates            | 490          |
|    policy_gradient_loss | -7.18e-05    |
|    reward               | 0.0          |
|    value_loss           | 0.00665      |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 9.13e+03     |
|    ep_rew_mean          | -0.423       |
| time/                   |              |
|    fps                  | 225          |
|    iterations           | 51           |
|    time_elapsed         | 2078         |
|    total_timesteps      | 469200       |
| train/                  |              |
|    approx_kl            | 0.0003449269 |
|    clip_fraction        | 0.00475      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.0995      |
|    explained_variance   | 2.38e-06     |
|    learning_rate        | 0.00025      |
|    loss                 | -0.0028      |
|    n_updates            | 500          |
|    policy_gradient_loss | -0.000567    |
|    reward               | 0.0          |
|    value_loss           | 0.00526      |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | -0.399        |
| time/                   |               |
|    fps                  | 225           |
|    iterations           | 52            |
|    time_elapsed         | 2120          |
|    total_timesteps      | 478400        |
| train/                  |               |
|    approx_kl            | 0.00018319425 |
|    clip_fraction        | 0.00298       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.096        |
|    explained_variance   | 1.13e-05      |
|    learning_rate        | 0.00025       |
|    loss                 | -0.000928     |
|    n_updates            | 510           |
|    policy_gradient_loss | -5.93e-05     |
|    reward               | 0.0           |
|    value_loss           | 0.00597       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | -0.371        |
| time/                   |               |
|    fps                  | 225           |
|    iterations           | 53            |
|    time_elapsed         | 2161          |
|    total_timesteps      | 487600        |
| train/                  |               |
|    approx_kl            | 0.00032480518 |
|    clip_fraction        | 0.00671       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0897       |
|    explained_variance   | 6.79e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | -0.00241      |
|    n_updates            | 520           |
|    policy_gradient_loss | -0.000496     |
|    reward               | 0.0           |
|    value_loss           | 0.00525       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | -0.347        |
| time/                   |               |
|    fps                  | 225           |
|    iterations           | 54            |
|    time_elapsed         | 2204          |
|    total_timesteps      | 496800        |
| train/                  |               |
|    approx_kl            | 0.00016153498 |
|    clip_fraction        | 0.00198       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0919       |
|    explained_variance   | 9.6e-06       |
|    learning_rate        | 0.00025       |
|    loss                 | 0.00234       |
|    n_updates            | 530           |
|    policy_gradient_loss | 0.000259      |
|    reward               | 0.0           |
|    value_loss           | 0.00596       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | -0.327        |
| time/                   |               |
|    fps                  | 225           |
|    iterations           | 55            |
|    time_elapsed         | 2245          |
|    total_timesteps      | 506000        |
| train/                  |               |
|    approx_kl            | 0.00024171786 |
|    clip_fraction        | 0.00497       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0898       |
|    explained_variance   | 5.3e-06       |
|    learning_rate        | 0.00025       |
|    loss                 | 0.00398       |
|    n_updates            | 540           |
|    policy_gradient_loss | -0.000242     |
|    reward               | 0.0015166347  |
|    value_loss           | 0.00504       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | -0.308        |
| time/                   |               |
|    fps                  | 225           |
|    iterations           | 56            |
|    time_elapsed         | 2288          |
|    total_timesteps      | 515200        |
| train/                  |               |
|    approx_kl            | 0.00022168113 |
|    clip_fraction        | 0.004         |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.104        |
|    explained_variance   | 3.4e-06       |
|    learning_rate        | 0.00025       |
|    loss                 | 0.00244       |
|    n_updates            | 550           |
|    policy_gradient_loss | -4.06e-05     |
|    reward               | 0.0035152535  |
|    value_loss           | 0.00641       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | -0.288        |
| time/                   |               |
|    fps                  | 225           |
|    iterations           | 57            |
|    time_elapsed         | 2328          |
|    total_timesteps      | 524400        |
| train/                  |               |
|    approx_kl            | 0.00029415634 |
|    clip_fraction        | 0.0036        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.103        |
|    explained_variance   | 2.68e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | -0.00121      |
|    n_updates            | 560           |
|    policy_gradient_loss | -0.000553     |
|    reward               | 0.0           |
|    value_loss           | 0.00504       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | -0.266        |
| time/                   |               |
|    fps                  | 225           |
|    iterations           | 58            |
|    time_elapsed         | 2369          |
|    total_timesteps      | 533600        |
| train/                  |               |
|    approx_kl            | 0.00045657004 |
|    clip_fraction        | 0.00521       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0897       |
|    explained_variance   | 4.05e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | -0.00694      |
|    n_updates            | 570           |
|    policy_gradient_loss | -0.000588     |
|    reward               | 0.0           |
|    value_loss           | 0.00535       |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 9.13e+03     |
|    ep_rew_mean          | -0.252       |
| time/                   |              |
|    fps                  | 225          |
|    iterations           | 59           |
|    time_elapsed         | 2409         |
|    total_timesteps      | 542800       |
| train/                  |              |
|    approx_kl            | 0.0001776482 |
|    clip_fraction        | 0.00338      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.0771      |
|    explained_variance   | 6.32e-06     |
|    learning_rate        | 0.00025      |
|    loss                 | -0.00137     |
|    n_updates            | 580          |
|    policy_gradient_loss | -0.000217    |
|    reward               | 0.0037137077 |
|    value_loss           | 0.00588      |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | -0.23         |
| time/                   |               |
|    fps                  | 225           |
|    iterations           | 60            |
|    time_elapsed         | 2450          |
|    total_timesteps      | 552000        |
| train/                  |               |
|    approx_kl            | 0.00029975263 |
|    clip_fraction        | 0.00616       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0832       |
|    explained_variance   | 3.7e-06       |
|    learning_rate        | 0.00025       |
|    loss                 | -0.000517     |
|    n_updates            | 590           |
|    policy_gradient_loss | -0.000265     |
|    reward               | -0.00020002   |
|    value_loss           | 0.00574       |
-------------------------------------------


--------------------------------------------
| rollout/                |                |
|    ep_len_mean          | 9.13e+03       |
|    ep_rew_mean          | -0.212         |
| time/                   |                |
|    fps                  | 225            |
|    iterations           | 61             |
|    time_elapsed         | 2490           |
|    total_timesteps      | 561200         |
| train/                  |                |
|    approx_kl            | 0.000119656965 |
|    clip_fraction        | 0.00386        |
|    clip_range           | 0.2            |
|    entropy_loss         | -0.0765        |
|    explained_variance   | -0.0127        |
|    learning_rate        | 0.00025        |
|    loss                 | 0.00219        |
|    n_updates            | 600            |
|    policy_gradient_loss | -0.000155      |
|    reward               | 0.0053267293   |
|    value_loss           | 0.00634        |
--------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | -0.195        |
| time/                   |               |
|    fps                  | 225           |
|    iterations           | 62            |
|    time_elapsed         | 2531          |
|    total_timesteps      | 570400        |
| train/                  |               |
|    approx_kl            | 0.0001905661  |
|    clip_fraction        | 0.00474       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0809       |
|    explained_variance   | 5.42e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | -0.000578     |
|    n_updates            | 610           |
|    policy_gradient_loss | -8.08e-05     |
|    reward               | 0.00039597208 |
|    value_loss           | 0.00608       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | -0.186        |
| time/                   |               |
|    fps                  | 225           |
|    iterations           | 63            |
|    time_elapsed         | 2573          |
|    total_timesteps      | 579600        |
| train/                  |               |
|    approx_kl            | 0.00020383974 |
|    clip_fraction        | 0.00608       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0961       |
|    explained_variance   | 1.13e-05      |
|    learning_rate        | 0.00025       |
|    loss                 | 0.000711      |
|    n_updates            | 620           |
|    policy_gradient_loss | -0.000398     |
|    reward               | -0.0022638112 |
|    value_loss           | 0.00587       |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 9.13e+03     |
|    ep_rew_mean          | -0.17        |
| time/                   |              |
|    fps                  | 225          |
|    iterations           | 64           |
|    time_elapsed         | 2615         |
|    total_timesteps      | 588800       |
| train/                  |              |
|    approx_kl            | 0.0002918251 |
|    clip_fraction        | 0.00486      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.105       |
|    explained_variance   | 4.35e-06     |
|    learning_rate        | 0.00025      |
|    loss                 | 0.00126      |
|    n_updates            | 630          |
|    policy_gradient_loss | -9.99e-05    |
|    reward               | -0.021435104 |
|    value_loss           | 0.00635      |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | -0.158        |
| time/                   |               |
|    fps                  | 225           |
|    iterations           | 65            |
|    time_elapsed         | 2655          |
|    total_timesteps      | 598000        |
| train/                  |               |
|    approx_kl            | 0.00017718395 |
|    clip_fraction        | 0.0037        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0967       |
|    explained_variance   | 8.7e-06       |
|    learning_rate        | 0.00025       |
|    loss                 | 0.00434       |
|    n_updates            | 640           |
|    policy_gradient_loss | -0.000498     |
|    reward               | 0.019023553   |
|    value_loss           | 0.0054        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | -0.139        |
| time/                   |               |
|    fps                  | 225           |
|    iterations           | 66            |
|    time_elapsed         | 2696          |
|    total_timesteps      | 607200        |
| train/                  |               |
|    approx_kl            | 0.00013727874 |
|    clip_fraction        | 0.00315       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.103        |
|    explained_variance   | 3.22e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | 0.00131       |
|    n_updates            | 650           |
|    policy_gradient_loss | 0.000164      |
|    reward               | 0.011210056   |
|    value_loss           | 0.00572       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | -0.106        |
| time/                   |               |
|    fps                  | 225           |
|    iterations           | 68            |
|    time_elapsed         | 2778          |
|    total_timesteps      | 625600        |
| train/                  |               |
|    approx_kl            | 0.00014494863 |
|    clip_fraction        | 0.00297       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.106        |
|    explained_variance   | 3.64e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | 0.0031        |
|    n_updates            | 670           |
|    policy_gradient_loss | 0.000143      |
|    reward               | 0.016942177   |
|    value_loss           | 0.00502       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | -0.0897       |
| time/                   |               |
|    fps                  | 225           |
|    iterations           | 69            |
|    time_elapsed         | 2819          |
|    total_timesteps      | 634800        |
| train/                  |               |
|    approx_kl            | 0.00039359712 |
|    clip_fraction        | 0.0048        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0946       |
|    explained_variance   | 3.46e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | -0.00223      |
|    n_updates            | 680           |
|    policy_gradient_loss | -0.000275     |
|    reward               | -0.013666893  |
|    value_loss           | 0.00621       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | -0.0752       |
| time/                   |               |
|    fps                  | 225           |
|    iterations           | 70            |
|    time_elapsed         | 2860          |
|    total_timesteps      | 644000        |
| train/                  |               |
|    approx_kl            | 0.00014841821 |
|    clip_fraction        | 0.00317       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.104        |
|    explained_variance   | 6.79e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | 0.0013        |
|    n_updates            | 690           |
|    policy_gradient_loss | 0.000138      |
|    reward               | -0.0033962564 |
|    value_loss           | 0.00695       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | -0.0644       |
| time/                   |               |
|    fps                  | 225           |
|    iterations           | 71            |
|    time_elapsed         | 2901          |
|    total_timesteps      | 653200        |
| train/                  |               |
|    approx_kl            | 0.00017126538 |
|    clip_fraction        | 0.00402       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.112        |
|    explained_variance   | 3.34e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | 0.00351       |
|    n_updates            | 700           |
|    policy_gradient_loss | 0.00015       |
|    reward               | 0.007904889   |
|    value_loss           | 0.00549       |
-------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 9.13e+03    |
|    ep_rew_mean          | -0.0516     |
| time/                   |             |
|    fps                  | 225         |
|    iterations           | 72          |
|    time_elapsed         | 2943        |
|    total_timesteps      | 662400      |
| train/                  |             |
|    approx_kl            | 0.000320899 |
|    clip_fraction        | 0.00295     |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.12       |
|    explained_variance   | 3.93e-06    |
|    learning_rate        | 0.00025     |
|    loss                 | -0.00243    |
|    n_updates            | 710         |
|    policy_gradient_loss | 8.86e-05    |
|    reward               | 0.0093953   |
|    value_loss           | 0.00569     |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 9.13e+03     |
|    ep_rew_mean          | -0.0362      |
| time/                   |              |
|    fps                  | 225          |
|    iterations           | 73           |
|    time_elapsed         | 2984         |
|    total_timesteps      | 671600       |
| train/                  |              |
|    approx_kl            | 0.0002321263 |
|    clip_fraction        | 0.00376      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.11        |
|    explained_variance   | 3.81e-06     |
|    learning_rate        | 0.00025      |
|    loss                 | 0.00178      |
|    n_updates            | 720          |
|    policy_gradient_loss | -0.000177    |
|    reward               | 0.030192085  |
|    value_loss           | 0.00596      |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | -0.0278       |
| time/                   |               |
|    fps                  | 225           |
|    iterations           | 74            |
|    time_elapsed         | 3025          |
|    total_timesteps      | 680800        |
| train/                  |               |
|    approx_kl            | 0.00036143293 |
|    clip_fraction        | 0.00577       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.107        |
|    explained_variance   | 3.99e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | 0.00504       |
|    n_updates            | 730           |
|    policy_gradient_loss | -0.00014      |
|    reward               | 0.0038275106  |
|    value_loss           | 0.00703       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | -0.00946      |
| time/                   |               |
|    fps                  | 224           |
|    iterations           | 75            |
|    time_elapsed         | 3068          |
|    total_timesteps      | 690000        |
| train/                  |               |
|    approx_kl            | 4.817249e-05  |
|    clip_fraction        | 0.0024        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.097        |
|    explained_variance   | 1.79e-07      |
|    learning_rate        | 0.00025       |
|    loss                 | 0.00129       |
|    n_updates            | 740           |
|    policy_gradient_loss | -8.89e-06     |
|    reward               | -0.0047428976 |
|    value_loss           | 0.00597       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.000361      |
| time/                   |               |
|    fps                  | 224           |
|    iterations           | 76            |
|    time_elapsed         | 3109          |
|    total_timesteps      | 699200        |
| train/                  |               |
|    approx_kl            | 0.00013136017 |
|    clip_fraction        | 0.00229       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0948       |
|    explained_variance   | 6.56e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | -0.00013      |
|    n_updates            | 750           |
|    policy_gradient_loss | 0.000141      |
|    reward               | 0.032661267   |
|    value_loss           | 0.00601       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.0133        |
| time/                   |               |
|    fps                  | 224           |
|    iterations           | 77            |
|    time_elapsed         | 3150          |
|    total_timesteps      | 708400        |
| train/                  |               |
|    approx_kl            | 0.00020409194 |
|    clip_fraction        | 0.00284       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0896       |
|    explained_variance   | 2.5e-06       |
|    learning_rate        | 0.00025       |
|    loss                 | -0.00327      |
|    n_updates            | 760           |
|    policy_gradient_loss | -3.04e-05     |
|    reward               | 0.03231121    |
|    value_loss           | 0.00694       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.0227        |
| time/                   |               |
|    fps                  | 224           |
|    iterations           | 78            |
|    time_elapsed         | 3191          |
|    total_timesteps      | 717600        |
| train/                  |               |
|    approx_kl            | 0.00013436613 |
|    clip_fraction        | 0.00384       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.103        |
|    explained_variance   | -3.58e-06     |
|    learning_rate        | 0.00025       |
|    loss                 | 8.48e-05      |
|    n_updates            | 770           |
|    policy_gradient_loss | 5.27e-05      |
|    reward               | -0.029769393  |
|    value_loss           | 0.0056        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.0315        |
| time/                   |               |
|    fps                  | 224           |
|    iterations           | 79            |
|    time_elapsed         | 3232          |
|    total_timesteps      | 726800        |
| train/                  |               |
|    approx_kl            | 0.00012157396 |
|    clip_fraction        | 0.00251       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.107        |
|    explained_variance   | 3.04e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | 0.00313       |
|    n_updates            | 780           |
|    policy_gradient_loss | 0.000181      |
|    reward               | 0.002826031   |
|    value_loss           | 0.0048        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.0443        |
| time/                   |               |
|    fps                  | 224           |
|    iterations           | 80            |
|    time_elapsed         | 3274          |
|    total_timesteps      | 736000        |
| train/                  |               |
|    approx_kl            | 0.00046437708 |
|    clip_fraction        | 0.00563       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.108        |
|    explained_variance   | 1.07e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | 0.00277       |
|    n_updates            | 790           |
|    policy_gradient_loss | -0.000442     |
|    reward               | 0.004116332   |
|    value_loss           | 0.00559       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.0438        |
| time/                   |               |
|    fps                  | 224           |
|    iterations           | 81            |
|    time_elapsed         | 3315          |
|    total_timesteps      | 745200        |
| train/                  |               |
|    approx_kl            | 0.00049951917 |
|    clip_fraction        | 0.00787       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0941       |
|    explained_variance   | -1.19e-07     |
|    learning_rate        | 0.00025       |
|    loss                 | -0.00329      |
|    n_updates            | 800           |
|    policy_gradient_loss | -0.000839     |
|    reward               | 0.008167051   |
|    value_loss           | 0.00604       |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 9.13e+03     |
|    ep_rew_mean          | 0.0505       |
| time/                   |              |
|    fps                  | 224          |
|    iterations           | 82           |
|    time_elapsed         | 3356         |
|    total_timesteps      | 754400       |
| train/                  |              |
|    approx_kl            | 3.862607e-05 |
|    clip_fraction        | 0.0025       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.106       |
|    explained_variance   | 2.44e-06     |
|    learning_rate        | 0.00025      |
|    loss                 | 0.000364     |
|    n_updates            | 810          |
|    policy_gradient_loss | 0.000208     |
|    reward               | 0.042721204  |
|    value_loss           | 0.0057       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.0595        |
| time/                   |               |
|    fps                  | 224           |
|    iterations           | 83            |
|    time_elapsed         | 3397          |
|    total_timesteps      | 763600        |
| train/                  |               |
|    approx_kl            | 0.00014434138 |
|    clip_fraction        | 0.00337       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.103        |
|    explained_variance   | 2.62e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | 0.0013        |
|    n_updates            | 820           |
|    policy_gradient_loss | 0.000178      |
|    reward               | -0.00020002   |
|    value_loss           | 0.00573       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.0673        |
| time/                   |               |
|    fps                  | 224           |
|    iterations           | 84            |
|    time_elapsed         | 3438          |
|    total_timesteps      | 772800        |
| train/                  |               |
|    approx_kl            | 0.00021686396 |
|    clip_fraction        | 0.003         |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.106        |
|    explained_variance   | -1.19e-06     |
|    learning_rate        | 0.00025       |
|    loss                 | 0.0011        |
|    n_updates            | 830           |
|    policy_gradient_loss | 0.000132      |
|    reward               | 0.0041970033  |
|    value_loss           | 0.00597       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.0726        |
| time/                   |               |
|    fps                  | 224           |
|    iterations           | 85            |
|    time_elapsed         | 3479          |
|    total_timesteps      | 782000        |
| train/                  |               |
|    approx_kl            | 0.00017930244 |
|    clip_fraction        | 0.00498       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0969       |
|    explained_variance   | 1.91e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | -0.00212      |
|    n_updates            | 840           |
|    policy_gradient_loss | -0.000137     |
|    reward               | 0.007370395   |
|    value_loss           | 0.00735       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.0841        |
| time/                   |               |
|    fps                  | 224           |
|    iterations           | 86            |
|    time_elapsed         | 3519          |
|    total_timesteps      | 791200        |
| train/                  |               |
|    approx_kl            | 0.00025310658 |
|    clip_fraction        | 0.0044        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.11         |
|    explained_variance   | 2.8e-06       |
|    learning_rate        | 0.00025       |
|    loss                 | -0.00127      |
|    n_updates            | 850           |
|    policy_gradient_loss | 0.000141      |
|    reward               | -0.007174203  |
|    value_loss           | 0.00568       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.0901        |
| time/                   |               |
|    fps                  | 224           |
|    iterations           | 87            |
|    time_elapsed         | 3560          |
|    total_timesteps      | 800400        |
| train/                  |               |
|    approx_kl            | 0.00043964377 |
|    clip_fraction        | 0.00567       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.12         |
|    explained_variance   | 1.01e-05      |
|    learning_rate        | 0.00025       |
|    loss                 | 0.00464       |
|    n_updates            | 860           |
|    policy_gradient_loss | -0.000312     |
|    reward               | 0.0016270535  |
|    value_loss           | 0.00571       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.109         |
| time/                   |               |
|    fps                  | 224           |
|    iterations           | 89            |
|    time_elapsed         | 3642          |
|    total_timesteps      | 818800        |
| train/                  |               |
|    approx_kl            | 0.00032488347 |
|    clip_fraction        | 0.00363       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.102        |
|    explained_variance   | 1.85e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | 6.05e-05      |
|    n_updates            | 880           |
|    policy_gradient_loss | -0.000288     |
|    reward               | 0.008784773   |
|    value_loss           | 0.00532       |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 9.13e+03     |
|    ep_rew_mean          | 0.115        |
| time/                   |              |
|    fps                  | 224          |
|    iterations           | 90           |
|    time_elapsed         | 3683         |
|    total_timesteps      | 828000       |
| train/                  |              |
|    approx_kl            | 0.0001512939 |
|    clip_fraction        | 0.00353      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.0988      |
|    explained_variance   | 2.98e-06     |
|    learning_rate        | 0.00025      |
|    loss                 | 0.00129      |
|    n_updates            | 890          |
|    policy_gradient_loss | -0.000138    |
|    reward               | 0.0          |
|    value_loss           | 0.0055       |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.121         |
| time/                   |               |
|    fps                  | 224           |
|    iterations           | 91            |
|    time_elapsed         | 3724          |
|    total_timesteps      | 837200        |
| train/                  |               |
|    approx_kl            | 0.00035621383 |
|    clip_fraction        | 0.00314       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0847       |
|    explained_variance   | 8.94e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | 0.000657      |
|    n_updates            | 900           |
|    policy_gradient_loss | -0.000165     |
|    reward               | 0.0           |
|    value_loss           | 0.00577       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.13          |
| time/                   |               |
|    fps                  | 224           |
|    iterations           | 92            |
|    time_elapsed         | 3765          |
|    total_timesteps      | 846400        |
| train/                  |               |
|    approx_kl            | 0.00029857483 |
|    clip_fraction        | 0.00433       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0939       |
|    explained_variance   | 3.46e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | 0.00618       |
|    n_updates            | 910           |
|    policy_gradient_loss | 0.000253      |
|    reward               | 0.0022410539  |
|    value_loss           | 0.00527       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.136         |
| time/                   |               |
|    fps                  | 224           |
|    iterations           | 93            |
|    time_elapsed         | 3807          |
|    total_timesteps      | 855600        |
| train/                  |               |
|    approx_kl            | 0.00028554426 |
|    clip_fraction        | 0.00188       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.107        |
|    explained_variance   | 8.58e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | 0.00148       |
|    n_updates            | 920           |
|    policy_gradient_loss | 0.000219      |
|    reward               | 0.0           |
|    value_loss           | 0.00589       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.14          |
| time/                   |               |
|    fps                  | 224           |
|    iterations           | 94            |
|    time_elapsed         | 3848          |
|    total_timesteps      | 864800        |
| train/                  |               |
|    approx_kl            | 0.00035656258 |
|    clip_fraction        | 0.00665       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.122        |
|    explained_variance   | 8.88e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | -0.000533     |
|    n_updates            | 930           |
|    policy_gradient_loss | -7.87e-05     |
|    reward               | 0.0035460233  |
|    value_loss           | 0.0056        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.143         |
| time/                   |               |
|    fps                  | 224           |
|    iterations           | 95            |
|    time_elapsed         | 3889          |
|    total_timesteps      | 874000        |
| train/                  |               |
|    approx_kl            | 0.00060030894 |
|    clip_fraction        | 0.00668       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.114        |
|    explained_variance   | -9.54e-06     |
|    learning_rate        | 0.00025       |
|    loss                 | -0.000241     |
|    n_updates            | 940           |
|    policy_gradient_loss | -0.000655     |
|    reward               | 0.0           |
|    value_loss           | 0.00526       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.144         |
| time/                   |               |
|    fps                  | 224           |
|    iterations           | 96            |
|    time_elapsed         | 3932          |
|    total_timesteps      | 883200        |
| train/                  |               |
|    approx_kl            | 0.00019582894 |
|    clip_fraction        | 0.00427       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.13         |
|    explained_variance   | 1.25e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | 0.00128       |
|    n_updates            | 950           |
|    policy_gradient_loss | 0.000111      |
|    reward               | 0.000505021   |
|    value_loss           | 0.00568       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.154         |
| time/                   |               |
|    fps                  | 224           |
|    iterations           | 97            |
|    time_elapsed         | 3973          |
|    total_timesteps      | 892400        |
| train/                  |               |
|    approx_kl            | 0.00033842478 |
|    clip_fraction        | 0.00499       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.132        |
|    explained_variance   | 1.94e-05      |
|    learning_rate        | 0.00025       |
|    loss                 | 0.00328       |
|    n_updates            | 960           |
|    policy_gradient_loss | -0.000269     |
|    reward               | 0.015012812   |
|    value_loss           | 0.00499       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.164         |
| time/                   |               |
|    fps                  | 224           |
|    iterations           | 98            |
|    time_elapsed         | 4015          |
|    total_timesteps      | 901600        |
| train/                  |               |
|    approx_kl            | 0.00033871765 |
|    clip_fraction        | 0.00372       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.125        |
|    explained_variance   | -7.15e-07     |
|    learning_rate        | 0.00025       |
|    loss                 | -0.000512     |
|    n_updates            | 970           |
|    policy_gradient_loss | -5e-05        |
|    reward               | 0.0072503444  |
|    value_loss           | 0.00549       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.166         |
| time/                   |               |
|    fps                  | 224           |
|    iterations           | 99            |
|    time_elapsed         | 4055          |
|    total_timesteps      | 910800        |
| train/                  |               |
|    approx_kl            | 0.00041108043 |
|    clip_fraction        | 0.00428       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.102        |
|    explained_variance   | 1.28e-05      |
|    learning_rate        | 0.00025       |
|    loss                 | -0.00126      |
|    n_updates            | 980           |
|    policy_gradient_loss | -0.00035      |
|    reward               | 0.0           |
|    value_loss           | 0.00469       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.171         |
| time/                   |               |
|    fps                  | 224           |
|    iterations           | 100           |
|    time_elapsed         | 4096          |
|    total_timesteps      | 920000        |
| train/                  |               |
|    approx_kl            | 0.00010428273 |
|    clip_fraction        | 0.00293       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.101        |
|    explained_variance   | 9.54e-07      |
|    learning_rate        | 0.00025       |
|    loss                 | 0.00148       |
|    n_updates            | 990           |
|    policy_gradient_loss | 0.000161      |
|    reward               | 0.0024144945  |
|    value_loss           | 0.0063        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.215         |
| time/                   |               |
|    fps                  | 224           |
|    iterations           | 101           |
|    time_elapsed         | 4137          |
|    total_timesteps      | 929200        |
| train/                  |               |
|    approx_kl            | 0.00035094764 |
|    clip_fraction        | 0.00308       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.107        |
|    explained_variance   | 6.68e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | -0.00584      |
|    n_updates            | 1000          |
|    policy_gradient_loss | -9.43e-05     |
|    reward               | 0.0           |
|    value_loss           | 0.00621       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.229         |
| time/                   |               |
|    fps                  | 224           |
|    iterations           | 102           |
|    time_elapsed         | 4178          |
|    total_timesteps      | 938400        |
| train/                  |               |
|    approx_kl            | 0.0001377173  |
|    clip_fraction        | 0.00343       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.101        |
|    explained_variance   | 8.34e-07      |
|    learning_rate        | 0.00025       |
|    loss                 | -0.0015       |
|    n_updates            | 1010          |
|    policy_gradient_loss | -1.37e-05     |
|    reward               | 2.4927951e-05 |
|    value_loss           | 0.00556       |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 9.13e+03     |
|    ep_rew_mean          | 0.281        |
| time/                   |              |
|    fps                  | 224          |
|    iterations           | 103          |
|    time_elapsed         | 4220         |
|    total_timesteps      | 947600       |
| train/                  |              |
|    approx_kl            | 7.155732e-05 |
|    clip_fraction        | 0.00142      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.103       |
|    explained_variance   | 4.23e-06     |
|    learning_rate        | 0.00025      |
|    loss                 | 0.00117      |
|    n_updates            | 1020         |
|    policy_gradient_loss | 0.000234     |
|    reward               | 0.008812328  |
|    value_loss           | 0.00645      |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 9.13e+03    |
|    ep_rew_mean          | 0.324       |
| time/                   |             |
|    fps                  | 224         |
|    iterations           | 104         |
|    time_elapsed         | 4261        |
|    total_timesteps      | 956800      |
| train/                  |             |
|    approx_kl            | 0.000237488 |
|    clip_fraction        | 0.00278     |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.11       |
|    explained_variance   | 1.73e-06    |
|    learning_rate        | 0.00025     |
|    loss                 | 0.00249     |
|    n_updates            | 1030        |
|    policy_gradient_loss | 7.94e-05    |
|    reward               | 0.0         |
|    value_loss           | 0.00509     |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 9.13e+03     |
|    ep_rew_mean          | 0.352        |
| time/                   |              |
|    fps                  | 224          |
|    iterations           | 105          |
|    time_elapsed         | 4303         |
|    total_timesteps      | 966000       |
| train/                  |              |
|    approx_kl            | 8.416591e-05 |
|    clip_fraction        | 0.00422      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.111       |
|    explained_variance   | 4.29e-06     |
|    learning_rate        | 0.00025      |
|    loss                 | 0.000849     |
|    n_updates            | 1040         |
|    policy_gradient_loss | 8.94e-06     |
|    reward               | 0.0          |
|    value_loss           | 0.00555      |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.373         |
| time/                   |               |
|    fps                  | 224           |
|    iterations           | 106           |
|    time_elapsed         | 4344          |
|    total_timesteps      | 975200        |
| train/                  |               |
|    approx_kl            | 0.00032046987 |
|    clip_fraction        | 0.00446       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.114        |
|    explained_variance   | 3.16e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | 0.0068        |
|    n_updates            | 1050          |
|    policy_gradient_loss | -0.000161     |
|    reward               | 0.0036156056  |
|    value_loss           | 0.00556       |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 9.13e+03     |
|    ep_rew_mean          | 0.4          |
| time/                   |              |
|    fps                  | 224          |
|    iterations           | 107          |
|    time_elapsed         | 4384         |
|    total_timesteps      | 984400       |
| train/                  |              |
|    approx_kl            | 0.0001711556 |
|    clip_fraction        | 0.00211      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.119       |
|    explained_variance   | 3.46e-06     |
|    learning_rate        | 0.00025      |
|    loss                 | -0.00206     |
|    n_updates            | 1060         |
|    policy_gradient_loss | 0.000236     |
|    reward               | 0.0          |
|    value_loss           | 0.00513      |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.446         |
| time/                   |               |
|    fps                  | 224           |
|    iterations           | 109           |
|    time_elapsed         | 4467          |
|    total_timesteps      | 1002800       |
| train/                  |               |
|    approx_kl            | 0.00020400892 |
|    clip_fraction        | 0.00223       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.107        |
|    explained_variance   | 1.49e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | 0.000108      |
|    n_updates            | 1080          |
|    policy_gradient_loss | 0.000145      |
|    reward               | -0.010835367  |
|    value_loss           | 0.00476       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.469         |
| time/                   |               |
|    fps                  | 224           |
|    iterations           | 111           |
|    time_elapsed         | 4551          |
|    total_timesteps      | 1021200       |
| train/                  |               |
|    approx_kl            | 0.00031368533 |
|    clip_fraction        | 0.00367       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0961       |
|    explained_variance   | 4.11e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | 0.00153       |
|    n_updates            | 1100          |
|    policy_gradient_loss | -0.000242     |
|    reward               | 0.0           |
|    value_loss           | 0.00558       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.5           |
| time/                   |               |
|    fps                  | 224           |
|    iterations           | 112           |
|    time_elapsed         | 4595          |
|    total_timesteps      | 1030400       |
| train/                  |               |
|    approx_kl            | 0.00010060986 |
|    clip_fraction        | 0.00209       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0926       |
|    explained_variance   | 3.7e-06       |
|    learning_rate        | 0.00025       |
|    loss                 | 0.00159       |
|    n_updates            | 1110          |
|    policy_gradient_loss | 0.000123      |
|    reward               | 0.0           |
|    value_loss           | 0.00613       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.5           |
| time/                   |               |
|    fps                  | 224           |
|    iterations           | 113           |
|    time_elapsed         | 4636          |
|    total_timesteps      | 1039600       |
| train/                  |               |
|    approx_kl            | 0.00020408115 |
|    clip_fraction        | 0.00442       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0991       |
|    explained_variance   | -1.31e-06     |
|    learning_rate        | 0.00025       |
|    loss                 | 0.00218       |
|    n_updates            | 1120          |
|    policy_gradient_loss | -6.04e-05     |
|    reward               | -0.0072550173 |
|    value_loss           | 0.00555       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.532         |
| time/                   |               |
|    fps                  | 224           |
|    iterations           | 114           |
|    time_elapsed         | 4676          |
|    total_timesteps      | 1048800       |
| train/                  |               |
|    approx_kl            | 0.00020547281 |
|    clip_fraction        | 0.00304       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.105        |
|    explained_variance   | 3.28e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | 0.000369      |
|    n_updates            | 1130          |
|    policy_gradient_loss | 0.000337      |
|    reward               | 0.0           |
|    value_loss           | 0.00604       |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 9.13e+03     |
|    ep_rew_mean          | 0.591        |
| time/                   |              |
|    fps                  | 224          |
|    iterations           | 116          |
|    time_elapsed         | 4757         |
|    total_timesteps      | 1067200      |
| train/                  |              |
|    approx_kl            | 0.0002497037 |
|    clip_fraction        | 0.00605      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.121       |
|    explained_variance   | 5.19e-06     |
|    learning_rate        | 0.00025      |
|    loss                 | -0.000329    |
|    n_updates            | 1150         |
|    policy_gradient_loss | -4.48e-05    |
|    reward               | 0.0054692095 |
|    value_loss           | 0.00563      |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.607         |
| time/                   |               |
|    fps                  | 224           |
|    iterations           | 117           |
|    time_elapsed         | 4799          |
|    total_timesteps      | 1076400       |
| train/                  |               |
|    approx_kl            | 0.00016453551 |
|    clip_fraction        | 0.00253       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.113        |
|    explained_variance   | 4.23e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | -0.000302     |
|    n_updates            | 1160          |
|    policy_gradient_loss | 7.3e-06       |
|    reward               | 0.0011284616  |
|    value_loss           | 0.00538       |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 9.13e+03     |
|    ep_rew_mean          | 0.611        |
| time/                   |              |
|    fps                  | 224          |
|    iterations           | 118          |
|    time_elapsed         | 4841         |
|    total_timesteps      | 1085600      |
| train/                  |              |
|    approx_kl            | 0.0004609605 |
|    clip_fraction        | 0.00527      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.111       |
|    explained_variance   | 2.21e-06     |
|    learning_rate        | 0.00025      |
|    loss                 | -0.000507    |
|    n_updates            | 1170         |
|    policy_gradient_loss | -0.000273    |
|    reward               | 0.0          |
|    value_loss           | 0.00491      |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.643         |
| time/                   |               |
|    fps                  | 224           |
|    iterations           | 119           |
|    time_elapsed         | 4881          |
|    total_timesteps      | 1094800       |
| train/                  |               |
|    approx_kl            | 0.00043458733 |
|    clip_fraction        | 0.00649       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.102        |
|    explained_variance   | 7.75e-07      |
|    learning_rate        | 0.00025       |
|    loss                 | 0.000375      |
|    n_updates            | 1180          |
|    policy_gradient_loss | -0.000508     |
|    reward               | -0.15957735   |
|    value_loss           | 0.00537       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.68          |
| time/                   |               |
|    fps                  | 224           |
|    iterations           | 120           |
|    time_elapsed         | 4922          |
|    total_timesteps      | 1104000       |
| train/                  |               |
|    approx_kl            | 0.00011791168 |
|    clip_fraction        | 0.0045        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.1          |
|    explained_variance   | -5.48e-06     |
|    learning_rate        | 0.00025       |
|    loss                 | -0.00181      |
|    n_updates            | 1190          |
|    policy_gradient_loss | -0.000171     |
|    reward               | 0.0           |
|    value_loss           | 0.0064        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.699         |
| time/                   |               |
|    fps                  | 224           |
|    iterations           | 121           |
|    time_elapsed         | 4963          |
|    total_timesteps      | 1113200       |
| train/                  |               |
|    approx_kl            | 0.00016303985 |
|    clip_fraction        | 0.00502       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0978       |
|    explained_variance   | 4.65e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | -0.000236     |
|    n_updates            | 1200          |
|    policy_gradient_loss | -0.000336     |
|    reward               | 0.0           |
|    value_loss           | 0.00587       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.717         |
| time/                   |               |
|    fps                  | 224           |
|    iterations           | 122           |
|    time_elapsed         | 5005          |
|    total_timesteps      | 1122400       |
| train/                  |               |
|    approx_kl            | 0.00034444494 |
|    clip_fraction        | 0.00439       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.096        |
|    explained_variance   | 5.42e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | -0.0039       |
|    n_updates            | 1210          |
|    policy_gradient_loss | -0.000162     |
|    reward               | 0.00068136264 |
|    value_loss           | 0.00549       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.76          |
| time/                   |               |
|    fps                  | 224           |
|    iterations           | 124           |
|    time_elapsed         | 5087          |
|    total_timesteps      | 1140800       |
| train/                  |               |
|    approx_kl            | 0.00025180724 |
|    clip_fraction        | 0.00259       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.106        |
|    explained_variance   | 4.89e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | 0.000316      |
|    n_updates            | 1230          |
|    policy_gradient_loss | 0.000236      |
|    reward               | 0.0           |
|    value_loss           | 0.00558       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.775         |
| time/                   |               |
|    fps                  | 224           |
|    iterations           | 127           |
|    time_elapsed         | 5213          |
|    total_timesteps      | 1168400       |
| train/                  |               |
|    approx_kl            | 0.00013748417 |
|    clip_fraction        | 0.0043        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.107        |
|    explained_variance   | 3.28e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | -0.000529     |
|    n_updates            | 1260          |
|    policy_gradient_loss | 6.64e-05      |
|    reward               | 0.0           |
|    value_loss           | 0.00615       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.774         |
| time/                   |               |
|    fps                  | 224           |
|    iterations           | 128           |
|    time_elapsed         | 5254          |
|    total_timesteps      | 1177600       |
| train/                  |               |
|    approx_kl            | 0.00017236763 |
|    clip_fraction        | 0.00487       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.126        |
|    explained_variance   | -1.57e-05     |
|    learning_rate        | 0.00025       |
|    loss                 | -3.63e-05     |
|    n_updates            | 1270          |
|    policy_gradient_loss | 0.000111      |
|    reward               | 0.004634679   |
|    value_loss           | 0.00484       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.781         |
| time/                   |               |
|    fps                  | 224           |
|    iterations           | 129           |
|    time_elapsed         | 5294          |
|    total_timesteps      | 1186800       |
| train/                  |               |
|    approx_kl            | 0.00018484975 |
|    clip_fraction        | 0.00271       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.117        |
|    explained_variance   | 4.71e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | -0.000109     |
|    n_updates            | 1280          |
|    policy_gradient_loss | -1.36e-05     |
|    reward               | 0.0           |
|    value_loss           | 0.00492       |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 9.13e+03     |
|    ep_rew_mean          | 0.795        |
| time/                   |              |
|    fps                  | 224          |
|    iterations           | 131          |
|    time_elapsed         | 5375         |
|    total_timesteps      | 1205200      |
| train/                  |              |
|    approx_kl            | 0.0001672099 |
|    clip_fraction        | 0.00264      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.117       |
|    explained_variance   | 7.09e-06     |
|    learning_rate        | 0.00025      |
|    loss                 | 0.00223      |
|    n_updates            | 1300         |
|    policy_gradient_loss | 0.000261     |
|    reward               | 0.003261728  |
|    value_loss           | 0.00525      |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.805         |
| time/                   |               |
|    fps                  | 224           |
|    iterations           | 132           |
|    time_elapsed         | 5413          |
|    total_timesteps      | 1214400       |
| train/                  |               |
|    approx_kl            | 0.00017657323 |
|    clip_fraction        | 0.00253       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.11         |
|    explained_variance   | 4.23e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | 0.000242      |
|    n_updates            | 1310          |
|    policy_gradient_loss | -0.000103     |
|    reward               | 0.0           |
|    value_loss           | 0.00502       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.801         |
| time/                   |               |
|    fps                  | 224           |
|    iterations           | 133           |
|    time_elapsed         | 5451          |
|    total_timesteps      | 1223600       |
| train/                  |               |
|    approx_kl            | 0.00025131082 |
|    clip_fraction        | 0.00347       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.101        |
|    explained_variance   | -1.19e-07     |
|    learning_rate        | 0.00025       |
|    loss                 | 0.000696      |
|    n_updates            | 1320          |
|    policy_gradient_loss | -0.000144     |
|    reward               | 0.004463303   |
|    value_loss           | 0.00529       |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 9.13e+03     |
|    ep_rew_mean          | 0.802        |
| time/                   |              |
|    fps                  | 224          |
|    iterations           | 134          |
|    time_elapsed         | 5491         |
|    total_timesteps      | 1232800      |
| train/                  |              |
|    approx_kl            | 0.0002232403 |
|    clip_fraction        | 0.00226      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.0911      |
|    explained_variance   | 1.01e-06     |
|    learning_rate        | 0.00025      |
|    loss                 | -4.44e-05    |
|    n_updates            | 1330         |
|    policy_gradient_loss | 0.000106     |
|    reward               | 0.0011952716 |
|    value_loss           | 0.00691      |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 9.13e+03     |
|    ep_rew_mean          | 0.798        |
| time/                   |              |
|    fps                  | 224          |
|    iterations           | 135          |
|    time_elapsed         | 5528         |
|    total_timesteps      | 1242000      |
| train/                  |              |
|    approx_kl            | 0.0002798813 |
|    clip_fraction        | 0.00376      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.0953      |
|    explained_variance   | -1.55e-06    |
|    learning_rate        | 0.00025      |
|    loss                 | -0.00277     |
|    n_updates            | 1340         |
|    policy_gradient_loss | 0.000194     |
|    reward               | 0.0          |
|    value_loss           | 0.00566      |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.795         |
| time/                   |               |
|    fps                  | 224           |
|    iterations           | 136           |
|    time_elapsed         | 5566          |
|    total_timesteps      | 1251200       |
| train/                  |               |
|    approx_kl            | 0.00019199388 |
|    clip_fraction        | 0.00322       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0967       |
|    explained_variance   | 3.34e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | 0.00292       |
|    n_updates            | 1350          |
|    policy_gradient_loss | 6.43e-05      |
|    reward               | 0.008417647   |
|    value_loss           | 0.00582       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.799         |
| time/                   |               |
|    fps                  | 224           |
|    iterations           | 138           |
|    time_elapsed         | 5646          |
|    total_timesteps      | 1269600       |
| train/                  |               |
|    approx_kl            | 0.00030638362 |
|    clip_fraction        | 0.00659       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.104        |
|    explained_variance   | 5.25e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | 0.00464       |
|    n_updates            | 1370          |
|    policy_gradient_loss | -0.000354     |
|    reward               | 0.0014681484  |
|    value_loss           | 0.00509       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.803         |
| time/                   |               |
|    fps                  | 224           |
|    iterations           | 139           |
|    time_elapsed         | 5684          |
|    total_timesteps      | 1278800       |
| train/                  |               |
|    approx_kl            | 7.3252646e-05 |
|    clip_fraction        | 0.00312       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.104        |
|    explained_variance   | 7.99e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | 0.000679      |
|    n_updates            | 1380          |
|    policy_gradient_loss | 2.54e-05      |
|    reward               | 0.0514369     |
|    value_loss           | 0.00475       |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 9.13e+03     |
|    ep_rew_mean          | 0.803        |
| time/                   |              |
|    fps                  | 225          |
|    iterations           | 140          |
|    time_elapsed         | 5722         |
|    total_timesteps      | 1288000      |
| train/                  |              |
|    approx_kl            | 0.0002687131 |
|    clip_fraction        | 0.00258      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.116       |
|    explained_variance   | -2.26e-06    |
|    learning_rate        | 0.00025      |
|    loss                 | -0.000569    |
|    n_updates            | 1390         |
|    policy_gradient_loss | 0.000273     |
|    reward               | 0.0          |
|    value_loss           | 0.00631      |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.802         |
| time/                   |               |
|    fps                  | 225           |
|    iterations           | 141           |
|    time_elapsed         | 5761          |
|    total_timesteps      | 1297200       |
| train/                  |               |
|    approx_kl            | 0.00013631265 |
|    clip_fraction        | 0.00215       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.108        |
|    explained_variance   | 3.22e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | -0.0024       |
|    n_updates            | 1400          |
|    policy_gradient_loss | 3.08e-05      |
|    reward               | 0.0010198713  |
|    value_loss           | 0.00549       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.806         |
| time/                   |               |
|    fps                  | 225           |
|    iterations           | 142           |
|    time_elapsed         | 5801          |
|    total_timesteps      | 1306400       |
| train/                  |               |
|    approx_kl            | 0.00027020148 |
|    clip_fraction        | 0.002         |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.097        |
|    explained_variance   | -8.34e-07     |
|    learning_rate        | 0.00025       |
|    loss                 | 0.00819       |
|    n_updates            | 1410          |
|    policy_gradient_loss | -2.43e-05     |
|    reward               | 0.007370641   |
|    value_loss           | 0.00591       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.806         |
| time/                   |               |
|    fps                  | 225           |
|    iterations           | 143           |
|    time_elapsed         | 5843          |
|    total_timesteps      | 1315600       |
| train/                  |               |
|    approx_kl            | 0.00017014191 |
|    clip_fraction        | 0.0032        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0952       |
|    explained_variance   | 7.09e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | -0.00113      |
|    n_updates            | 1420          |
|    policy_gradient_loss | 0.000146      |
|    reward               | 0.011072427   |
|    value_loss           | 0.00609       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.806         |
| time/                   |               |
|    fps                  | 225           |
|    iterations           | 144           |
|    time_elapsed         | 5884          |
|    total_timesteps      | 1324800       |
| train/                  |               |
|    approx_kl            | 5.3461048e-05 |
|    clip_fraction        | 0.0021        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0944       |
|    explained_variance   | 9.78e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | 0.000422      |
|    n_updates            | 1430          |
|    policy_gradient_loss | 0.000323      |
|    reward               | 0.006463265   |
|    value_loss           | 0.00619       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.812         |
| time/                   |               |
|    fps                  | 225           |
|    iterations           | 145           |
|    time_elapsed         | 5926          |
|    total_timesteps      | 1334000       |
| train/                  |               |
|    approx_kl            | 0.00026983555 |
|    clip_fraction        | 0.00517       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0894       |
|    explained_variance   | -2.86e-06     |
|    learning_rate        | 0.00025       |
|    loss                 | -0.00151      |
|    n_updates            | 1440          |
|    policy_gradient_loss | -0.000414     |
|    reward               | 0.025203634   |
|    value_loss           | 0.00632       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.823         |
| time/                   |               |
|    fps                  | 225           |
|    iterations           | 147           |
|    time_elapsed         | 6007          |
|    total_timesteps      | 1352400       |
| train/                  |               |
|    approx_kl            | 0.00027722828 |
|    clip_fraction        | 0.00139       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.106        |
|    explained_variance   | 4.71e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | 0.00536       |
|    n_updates            | 1460          |
|    policy_gradient_loss | 0.000252      |
|    reward               | 0.0022116252  |
|    value_loss           | 0.00496       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.827         |
| time/                   |               |
|    fps                  | 225           |
|    iterations           | 148           |
|    time_elapsed         | 6048          |
|    total_timesteps      | 1361600       |
| train/                  |               |
|    approx_kl            | 0.00033304296 |
|    clip_fraction        | 0.00379       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0937       |
|    explained_variance   | 1.73e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | 0.00469       |
|    n_updates            | 1470          |
|    policy_gradient_loss | -0.000343     |
|    reward               | 0.0003167819  |
|    value_loss           | 0.00597       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.825         |
| time/                   |               |
|    fps                  | 225           |
|    iterations           | 149           |
|    time_elapsed         | 6086          |
|    total_timesteps      | 1370800       |
| train/                  |               |
|    approx_kl            | 0.00012474632 |
|    clip_fraction        | 0.00264       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0923       |
|    explained_variance   | 4.17e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | 0.000715      |
|    n_updates            | 1480          |
|    policy_gradient_loss | 0.000179      |
|    reward               | 0.0054802718  |
|    value_loss           | 0.00655       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.828         |
| time/                   |               |
|    fps                  | 225           |
|    iterations           | 150           |
|    time_elapsed         | 6128          |
|    total_timesteps      | 1380000       |
| train/                  |               |
|    approx_kl            | 0.00022469503 |
|    clip_fraction        | 0.00464       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.108        |
|    explained_variance   | 3.7e-06       |
|    learning_rate        | 0.00025       |
|    loss                 | 0.000174      |
|    n_updates            | 1490          |
|    policy_gradient_loss | -3.35e-05     |
|    reward               | -0.0091384575 |
|    value_loss           | 0.00671       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.828         |
| time/                   |               |
|    fps                  | 225           |
|    iterations           | 151           |
|    time_elapsed         | 6167          |
|    total_timesteps      | 1389200       |
| train/                  |               |
|    approx_kl            | 0.00041393004 |
|    clip_fraction        | 0.00522       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.107        |
|    explained_variance   | 6.56e-07      |
|    learning_rate        | 0.00025       |
|    loss                 | 0.00126       |
|    n_updates            | 1500          |
|    policy_gradient_loss | -0.000119     |
|    reward               | 0.0066083875  |
|    value_loss           | 0.00591       |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 9.13e+03     |
|    ep_rew_mean          | 0.827        |
| time/                   |              |
|    fps                  | 225          |
|    iterations           | 152          |
|    time_elapsed         | 6207         |
|    total_timesteps      | 1398400      |
| train/                  |              |
|    approx_kl            | 0.0002561532 |
|    clip_fraction        | 0.00195      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.0903      |
|    explained_variance   | 6.68e-06     |
|    learning_rate        | 0.00025      |
|    loss                 | 0.00263      |
|    n_updates            | 1510         |
|    policy_gradient_loss | 1.35e-05     |
|    reward               | 0.0          |
|    value_loss           | 0.00604      |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.831         |
| time/                   |               |
|    fps                  | 225           |
|    iterations           | 153           |
|    time_elapsed         | 6247          |
|    total_timesteps      | 1407600       |
| train/                  |               |
|    approx_kl            | 0.00012648033 |
|    clip_fraction        | 0.00186       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0884       |
|    explained_variance   | 6.02e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | -0.00574      |
|    n_updates            | 1520          |
|    policy_gradient_loss | 0.00029       |
|    reward               | -0.026725043  |
|    value_loss           | 0.00535       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.832         |
| time/                   |               |
|    fps                  | 225           |
|    iterations           | 154           |
|    time_elapsed         | 6287          |
|    total_timesteps      | 1416800       |
| train/                  |               |
|    approx_kl            | 0.00026815222 |
|    clip_fraction        | 0.00424       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.089        |
|    explained_variance   | 4.17e-07      |
|    learning_rate        | 0.00025       |
|    loss                 | -0.00147      |
|    n_updates            | 1530          |
|    policy_gradient_loss | -0.000168     |
|    reward               | 0.0017461127  |
|    value_loss           | 0.00578       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.829         |
| time/                   |               |
|    fps                  | 225           |
|    iterations           | 157           |
|    time_elapsed         | 6411          |
|    total_timesteps      | 1444400       |
| train/                  |               |
|    approx_kl            | 0.00012648653 |
|    clip_fraction        | 0.00335       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.113        |
|    explained_variance   | 4.65e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | -0.000439     |
|    n_updates            | 1560          |
|    policy_gradient_loss | 0.000109      |
|    reward               | 0.005605255   |
|    value_loss           | 0.00582       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.837         |
| time/                   |               |
|    fps                  | 225           |
|    iterations           | 158           |
|    time_elapsed         | 6450          |
|    total_timesteps      | 1453600       |
| train/                  |               |
|    approx_kl            | 0.00023660716 |
|    clip_fraction        | 0.00491       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.113        |
|    explained_variance   | 5.01e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | -0.000881     |
|    n_updates            | 1570          |
|    policy_gradient_loss | -0.000261     |
|    reward               | 0.0           |
|    value_loss           | 0.00546       |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 9.13e+03     |
|    ep_rew_mean          | 0.833        |
| time/                   |              |
|    fps                  | 225          |
|    iterations           | 159          |
|    time_elapsed         | 6490         |
|    total_timesteps      | 1462800      |
| train/                  |              |
|    approx_kl            | 0.0003065767 |
|    clip_fraction        | 0.00554      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.114       |
|    explained_variance   | 1.67e-06     |
|    learning_rate        | 0.00025      |
|    loss                 | -0.00411     |
|    n_updates            | 1580         |
|    policy_gradient_loss | 2.38e-05     |
|    reward               | 0.006125948  |
|    value_loss           | 0.00531      |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 9.13e+03     |
|    ep_rew_mean          | 0.831        |
| time/                   |              |
|    fps                  | 225          |
|    iterations           | 160          |
|    time_elapsed         | 6531         |
|    total_timesteps      | 1472000      |
| train/                  |              |
|    approx_kl            | 0.0003609131 |
|    clip_fraction        | 0.00221      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.118       |
|    explained_variance   | 8.34e-07     |
|    learning_rate        | 0.00025      |
|    loss                 | 0.00085      |
|    n_updates            | 1590         |
|    policy_gradient_loss | 4.57e-05     |
|    reward               | 0.0          |
|    value_loss           | 0.00521      |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.824         |
| time/                   |               |
|    fps                  | 225           |
|    iterations           | 161           |
|    time_elapsed         | 6572          |
|    total_timesteps      | 1481200       |
| train/                  |               |
|    approx_kl            | 0.00024802936 |
|    clip_fraction        | 0.00328       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.112        |
|    explained_variance   | -1.07e-06     |
|    learning_rate        | 0.00025       |
|    loss                 | 0.00115       |
|    n_updates            | 1600          |
|    policy_gradient_loss | -3.77e-05     |
|    reward               | 0.0           |
|    value_loss           | 0.00581       |
-------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 9.13e+03    |
|    ep_rew_mean          | 0.833       |
| time/                   |             |
|    fps                  | 225         |
|    iterations           | 162         |
|    time_elapsed         | 6613        |
|    total_timesteps      | 1490400     |
| train/                  |             |
|    approx_kl            | 0.000250845 |
|    clip_fraction        | 0.00463     |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.124      |
|    explained_variance   | 1.79e-06    |
|    learning_rate        | 0.00025     |
|    loss                 | 0.00081     |
|    n_updates            | 1610        |
|    policy_gradient_loss | -0.000147   |
|    reward               | 0.0         |
|    value_loss           | 0.00581     |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 9.13e+03     |
|    ep_rew_mean          | 0.832        |
| time/                   |              |
|    fps                  | 225          |
|    iterations           | 163          |
|    time_elapsed         | 6653         |
|    total_timesteps      | 1499600      |
| train/                  |              |
|    approx_kl            | 9.753686e-05 |
|    clip_fraction        | 0.00423      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.117       |
|    explained_variance   | 4.41e-06     |
|    learning_rate        | 0.00025      |
|    loss                 | -0.000839    |
|    n_updates            | 1620         |
|    policy_gradient_loss | 0.00038      |
|    reward               | 0.020589542  |
|    value_loss           | 0.00522      |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 9.13e+03     |
|    ep_rew_mean          | 0.836        |
| time/                   |              |
|    fps                  | 225          |
|    iterations           | 164          |
|    time_elapsed         | 6695         |
|    total_timesteps      | 1508800      |
| train/                  |              |
|    approx_kl            | 0.0002673283 |
|    clip_fraction        | 0.00691      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.119       |
|    explained_variance   | -9.54e-06    |
|    learning_rate        | 0.00025      |
|    loss                 | -0.00147     |
|    n_updates            | 1630         |
|    policy_gradient_loss | -0.000639    |
|    reward               | 0.0020316453 |
|    value_loss           | 0.00533      |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 9.13e+03     |
|    ep_rew_mean          | 0.83         |
| time/                   |              |
|    fps                  | 225          |
|    iterations           | 165          |
|    time_elapsed         | 6735         |
|    total_timesteps      | 1518000      |
| train/                  |              |
|    approx_kl            | 0.0003501981 |
|    clip_fraction        | 0.00408      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.117       |
|    explained_variance   | 7.39e-06     |
|    learning_rate        | 0.00025      |
|    loss                 | -0.00193     |
|    n_updates            | 1640         |
|    policy_gradient_loss | -0.000268    |
|    reward               | 0.019327585  |
|    value_loss           | 0.00564      |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.825         |
| time/                   |               |
|    fps                  | 225           |
|    iterations           | 166           |
|    time_elapsed         | 6778          |
|    total_timesteps      | 1527200       |
| train/                  |               |
|    approx_kl            | 0.00037156028 |
|    clip_fraction        | 0.00366       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.108        |
|    explained_variance   | -4.77e-07     |
|    learning_rate        | 0.00025       |
|    loss                 | 0.00139       |
|    n_updates            | 1650          |
|    policy_gradient_loss | 5.09e-06      |
|    reward               | 0.00019760216 |
|    value_loss           | 0.00533       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.822         |
| time/                   |               |
|    fps                  | 225           |
|    iterations           | 167           |
|    time_elapsed         | 6818          |
|    total_timesteps      | 1536400       |
| train/                  |               |
|    approx_kl            | 7.609388e-05  |
|    clip_fraction        | 0.00274       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.11         |
|    explained_variance   | 3.76e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | 0.00273       |
|    n_updates            | 1660          |
|    policy_gradient_loss | 0.000303      |
|    reward               | -0.0011824257 |
|    value_loss           | 0.0049        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.824         |
| time/                   |               |
|    fps                  | 225           |
|    iterations           | 168           |
|    time_elapsed         | 6857          |
|    total_timesteps      | 1545600       |
| train/                  |               |
|    approx_kl            | 0.00018341381 |
|    clip_fraction        | 0.0051        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.111        |
|    explained_variance   | 1.67e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | -0.000777     |
|    n_updates            | 1670          |
|    policy_gradient_loss | -0.000262     |
|    reward               | 0.0           |
|    value_loss           | 0.00582       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.82          |
| time/                   |               |
|    fps                  | 225           |
|    iterations           | 169           |
|    time_elapsed         | 6897          |
|    total_timesteps      | 1554800       |
| train/                  |               |
|    approx_kl            | 0.00016913944 |
|    clip_fraction        | 0.00268       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0977       |
|    explained_variance   | 2.26e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | -0.000808     |
|    n_updates            | 1680          |
|    policy_gradient_loss | 0.000105      |
|    reward               | -0.0170855    |
|    value_loss           | 0.00546       |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 9.13e+03     |
|    ep_rew_mean          | 0.82         |
| time/                   |              |
|    fps                  | 225          |
|    iterations           | 170          |
|    time_elapsed         | 6940         |
|    total_timesteps      | 1564000      |
| train/                  |              |
|    approx_kl            | 0.0003629458 |
|    clip_fraction        | 0.00491      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.0982      |
|    explained_variance   | 5.25e-06     |
|    learning_rate        | 0.00025      |
|    loss                 | -0.000449    |
|    n_updates            | 1690         |
|    policy_gradient_loss | -0.000198    |
|    reward               | 0.0          |
|    value_loss           | 0.00603      |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 9.13e+03     |
|    ep_rew_mean          | 0.82         |
| time/                   |              |
|    fps                  | 225          |
|    iterations           | 171          |
|    time_elapsed         | 6980         |
|    total_timesteps      | 1573200      |
| train/                  |              |
|    approx_kl            | 0.0001339978 |
|    clip_fraction        | 0.00235      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.0883      |
|    explained_variance   | 2.68e-06     |
|    learning_rate        | 0.00025      |
|    loss                 | 0.00264      |
|    n_updates            | 1700         |
|    policy_gradient_loss | 2.3e-05      |
|    reward               | 0.008017286  |
|    value_loss           | 0.00679      |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.814         |
| time/                   |               |
|    fps                  | 225           |
|    iterations           | 172           |
|    time_elapsed         | 7021          |
|    total_timesteps      | 1582400       |
| train/                  |               |
|    approx_kl            | 0.00032523455 |
|    clip_fraction        | 0.00465       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0914       |
|    explained_variance   | 5.6e-06       |
|    learning_rate        | 0.00025       |
|    loss                 | 0.000318      |
|    n_updates            | 1710          |
|    policy_gradient_loss | -0.000209     |
|    reward               | 0.0           |
|    value_loss           | 0.0056        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.813         |
| time/                   |               |
|    fps                  | 225           |
|    iterations           | 174           |
|    time_elapsed         | 7098          |
|    total_timesteps      | 1600800       |
| train/                  |               |
|    approx_kl            | 0.00022217799 |
|    clip_fraction        | 0.00525       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0949       |
|    explained_variance   | -2.86e-06     |
|    learning_rate        | 0.00025       |
|    loss                 | 0.000328      |
|    n_updates            | 1730          |
|    policy_gradient_loss | -0.000288     |
|    reward               | 0.0           |
|    value_loss           | 0.00508       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.812         |
| time/                   |               |
|    fps                  | 225           |
|    iterations           | 175           |
|    time_elapsed         | 7142          |
|    total_timesteps      | 1610000       |
| train/                  |               |
|    approx_kl            | 0.00022650519 |
|    clip_fraction        | 0.00413       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0929       |
|    explained_variance   | 4.47e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | -0.00218      |
|    n_updates            | 1740          |
|    policy_gradient_loss | 5.77e-05      |
|    reward               | 0.0070671597  |
|    value_loss           | 0.00673       |
-------------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 9.13e+03   |
|    ep_rew_mean          | 0.811      |
| time/                   |            |
|    fps                  | 225        |
|    iterations           | 176        |
|    time_elapsed         | 7180       |
|    total_timesteps      | 1619200    |
| train/                  |            |
|    approx_kl            | 0.00017131 |
|    clip_fraction        | 0.00164    |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.0981    |
|    explained_variance   | 1.24e-05   |
|    learning_rate        | 0.00025    |
|    loss                 | 0.00193    |
|    n_updates            | 1750       |
|    policy_gradient_loss | 0.000248   |
|    reward               | 0.0        |
|    value_loss           | 0.00558    |
----------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.811         |
| time/                   |               |
|    fps                  | 225           |
|    iterations           | 177           |
|    time_elapsed         | 7220          |
|    total_timesteps      | 1628400       |
| train/                  |               |
|    approx_kl            | 0.00024436763 |
|    clip_fraction        | 0.00336       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.105        |
|    explained_variance   | 6.44e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | -0.00208      |
|    n_updates            | 1760          |
|    policy_gradient_loss | 0.000157      |
|    reward               | 0.0           |
|    value_loss           | 0.00574       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.81          |
| time/                   |               |
|    fps                  | 225           |
|    iterations           | 178           |
|    time_elapsed         | 7263          |
|    total_timesteps      | 1637600       |
| train/                  |               |
|    approx_kl            | 0.00033086212 |
|    clip_fraction        | 0.00327       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.111        |
|    explained_variance   | 1.43e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | 0.00133       |
|    n_updates            | 1770          |
|    policy_gradient_loss | 3.97e-05      |
|    reward               | 0.0031570983  |
|    value_loss           | 0.0062        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.82          |
| time/                   |               |
|    fps                  | 225           |
|    iterations           | 180           |
|    time_elapsed         | 7339          |
|    total_timesteps      | 1656000       |
| train/                  |               |
|    approx_kl            | 0.00016676707 |
|    clip_fraction        | 0.00253       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.107        |
|    explained_variance   | 3.34e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | 0.000123      |
|    n_updates            | 1790          |
|    policy_gradient_loss | 0.00017       |
|    reward               | 0.0018012712  |
|    value_loss           | 0.00644       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.82          |
| time/                   |               |
|    fps                  | 225           |
|    iterations           | 181           |
|    time_elapsed         | 7378          |
|    total_timesteps      | 1665200       |
| train/                  |               |
|    approx_kl            | 0.00018397548 |
|    clip_fraction        | 0.00127       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0967       |
|    explained_variance   | 6.62e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | 0.00114       |
|    n_updates            | 1800          |
|    policy_gradient_loss | 0.000114      |
|    reward               | 0.0026962715  |
|    value_loss           | 0.00581       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.819         |
| time/                   |               |
|    fps                  | 225           |
|    iterations           | 183           |
|    time_elapsed         | 7457          |
|    total_timesteps      | 1683600       |
| train/                  |               |
|    approx_kl            | 0.00040191173 |
|    clip_fraction        | 0.00188       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.109        |
|    explained_variance   | 7.39e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | -0.000977     |
|    n_updates            | 1820          |
|    policy_gradient_loss | 7.99e-05      |
|    reward               | -0.002454096  |
|    value_loss           | 0.00607       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.822         |
| time/                   |               |
|    fps                  | 225           |
|    iterations           | 184           |
|    time_elapsed         | 7495          |
|    total_timesteps      | 1692800       |
| train/                  |               |
|    approx_kl            | 0.00015880994 |
|    clip_fraction        | 0.0042        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.104        |
|    explained_variance   | 3.58e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | 0.00359       |
|    n_updates            | 1830          |
|    policy_gradient_loss | -3.94e-05     |
|    reward               | 0.005864779   |
|    value_loss           | 0.00594       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.819         |
| time/                   |               |
|    fps                  | 225           |
|    iterations           | 185           |
|    time_elapsed         | 7531          |
|    total_timesteps      | 1702000       |
| train/                  |               |
|    approx_kl            | 0.00019446266 |
|    clip_fraction        | 0.00241       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.1          |
|    explained_variance   | -4.77e-07     |
|    learning_rate        | 0.00025       |
|    loss                 | -0.000507     |
|    n_updates            | 1840          |
|    policy_gradient_loss | 0.000238      |
|    reward               | -0.011109954  |
|    value_loss           | 0.00716       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.816         |
| time/                   |               |
|    fps                  | 226           |
|    iterations           | 187           |
|    time_elapsed         | 7611          |
|    total_timesteps      | 1720400       |
| train/                  |               |
|    approx_kl            | 8.5634616e-05 |
|    clip_fraction        | 0.00368       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0985       |
|    explained_variance   | 2.38e-07      |
|    learning_rate        | 0.00025       |
|    loss                 | 0.00153       |
|    n_updates            | 1860          |
|    policy_gradient_loss | 9.83e-05      |
|    reward               | 0.0018094745  |
|    value_loss           | 0.00603       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.82          |
| time/                   |               |
|    fps                  | 226           |
|    iterations           | 188           |
|    time_elapsed         | 7651          |
|    total_timesteps      | 1729600       |
| train/                  |               |
|    approx_kl            | 0.00026726065 |
|    clip_fraction        | 0.0035        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.103        |
|    explained_variance   | 3.16e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | 0.00266       |
|    n_updates            | 1870          |
|    policy_gradient_loss | -9.97e-05     |
|    reward               | 0.0021125076  |
|    value_loss           | 0.00718       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.819         |
| time/                   |               |
|    fps                  | 226           |
|    iterations           | 189           |
|    time_elapsed         | 7689          |
|    total_timesteps      | 1738800       |
| train/                  |               |
|    approx_kl            | 0.00031932804 |
|    clip_fraction        | 0.00334       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.105        |
|    explained_variance   | 3.28e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | 0.000895      |
|    n_updates            | 1880          |
|    policy_gradient_loss | -6.47e-05     |
|    reward               | -0.025705155  |
|    value_loss           | 0.00502       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.827         |
| time/                   |               |
|    fps                  | 226           |
|    iterations           | 190           |
|    time_elapsed         | 7727          |
|    total_timesteps      | 1748000       |
| train/                  |               |
|    approx_kl            | 0.00020752214 |
|    clip_fraction        | 0.00342       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.106        |
|    explained_variance   | 2.5e-06       |
|    learning_rate        | 0.00025       |
|    loss                 | -0.000289     |
|    n_updates            | 1890          |
|    policy_gradient_loss | -0.00014      |
|    reward               | 0.010043862   |
|    value_loss           | 0.00528       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.825         |
| time/                   |               |
|    fps                  | 226           |
|    iterations           | 191           |
|    time_elapsed         | 7766          |
|    total_timesteps      | 1757200       |
| train/                  |               |
|    approx_kl            | 0.00030748462 |
|    clip_fraction        | 0.0046        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.102        |
|    explained_variance   | 3.1e-06       |
|    learning_rate        | 0.00025       |
|    loss                 | 0.00149       |
|    n_updates            | 1900          |
|    policy_gradient_loss | -0.000302     |
|    reward               | -0.0048207687 |
|    value_loss           | 0.00552       |
-------------------------------------------


--------------------------------------------
| rollout/                |                |
|    ep_len_mean          | 9.13e+03       |
|    ep_rew_mean          | 0.823          |
| time/                   |                |
|    fps                  | 226            |
|    iterations           | 192            |
|    time_elapsed         | 7805           |
|    total_timesteps      | 1766400        |
| train/                  |                |
|    approx_kl            | 0.000101327736 |
|    clip_fraction        | 0.00243        |
|    clip_range           | 0.2            |
|    entropy_loss         | -0.0926        |
|    explained_variance   | 9.12e-06       |
|    learning_rate        | 0.00025        |
|    loss                 | 0.000671       |
|    n_updates            | 1910           |
|    policy_gradient_loss | 0.000104       |
|    reward               | 0.004890887    |
|    value_loss           | 0.00571        |
--------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.822         |
| time/                   |               |
|    fps                  | 226           |
|    iterations           | 194           |
|    time_elapsed         | 7881          |
|    total_timesteps      | 1784800       |
| train/                  |               |
|    approx_kl            | 0.00026757928 |
|    clip_fraction        | 0.00448       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0995       |
|    explained_variance   | 5.66e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | -0.000902     |
|    n_updates            | 1930          |
|    policy_gradient_loss | 0.000116      |
|    reward               | 0.0           |
|    value_loss           | 0.00558       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.829         |
| time/                   |               |
|    fps                  | 226           |
|    iterations           | 195           |
|    time_elapsed         | 7920          |
|    total_timesteps      | 1794000       |
| train/                  |               |
|    approx_kl            | 0.00031391595 |
|    clip_fraction        | 0.00765       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0985       |
|    explained_variance   | 4.95e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | 0.000882      |
|    n_updates            | 1940          |
|    policy_gradient_loss | -0.000989     |
|    reward               | 0.006921929   |
|    value_loss           | 0.00619       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.825         |
| time/                   |               |
|    fps                  | 226           |
|    iterations           | 196           |
|    time_elapsed         | 7958          |
|    total_timesteps      | 1803200       |
| train/                  |               |
|    approx_kl            | 0.00017928811 |
|    clip_fraction        | 0.00345       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0934       |
|    explained_variance   | -5.13e-06     |
|    learning_rate        | 0.00025       |
|    loss                 | -1.44e-06     |
|    n_updates            | 1950          |
|    policy_gradient_loss | -0.000196     |
|    reward               | -0.010094597  |
|    value_loss           | 0.00562       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.821         |
| time/                   |               |
|    fps                  | 226           |
|    iterations           | 197           |
|    time_elapsed         | 7998          |
|    total_timesteps      | 1812400       |
| train/                  |               |
|    approx_kl            | 0.00017705598 |
|    clip_fraction        | 0.00509       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.105        |
|    explained_variance   | 3.34e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | -2.77e-05     |
|    n_updates            | 1960          |
|    policy_gradient_loss | -0.000161     |
|    reward               | 0.017612334   |
|    value_loss           | 0.00593       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.827         |
| time/                   |               |
|    fps                  | 226           |
|    iterations           | 198           |
|    time_elapsed         | 8037          |
|    total_timesteps      | 1821600       |
| train/                  |               |
|    approx_kl            | 0.00018738808 |
|    clip_fraction        | 0.00241       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0981       |
|    explained_variance   | 5.42e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | 2.39e-05      |
|    n_updates            | 1970          |
|    policy_gradient_loss | 0.000104      |
|    reward               | 0.0           |
|    value_loss           | 0.00654       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.835         |
| time/                   |               |
|    fps                  | 226           |
|    iterations           | 200           |
|    time_elapsed         | 8114          |
|    total_timesteps      | 1840000       |
| train/                  |               |
|    approx_kl            | 0.0001675041  |
|    clip_fraction        | 0.00396       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0824       |
|    explained_variance   | 4.41e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | 9.07e-05      |
|    n_updates            | 1990          |
|    policy_gradient_loss | 0.000146      |
|    reward               | -0.0044107544 |
|    value_loss           | 0.00607       |
-------------------------------------------


--------------------------------------------
| rollout/                |                |
|    ep_len_mean          | 9.13e+03       |
|    ep_rew_mean          | 0.833          |
| time/                   |                |
|    fps                  | 226            |
|    iterations           | 201            |
|    time_elapsed         | 8154           |
|    total_timesteps      | 1849200        |
| train/                  |                |
|    approx_kl            | 0.000120801385 |
|    clip_fraction        | 0.0036         |
|    clip_range           | 0.2            |
|    entropy_loss         | -0.0865        |
|    explained_variance   | 3.93e-06       |
|    learning_rate        | 0.00025        |
|    loss                 | 0.000894       |
|    n_updates            | 2000           |
|    policy_gradient_loss | -0.000171      |
|    reward               | 0.012030009    |
|    value_loss           | 0.00661        |
--------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.837         |
| time/                   |               |
|    fps                  | 226           |
|    iterations           | 202           |
|    time_elapsed         | 8193          |
|    total_timesteps      | 1858400       |
| train/                  |               |
|    approx_kl            | 0.00026733548 |
|    clip_fraction        | 0.00129       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0829       |
|    explained_variance   | 9.18e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | 0.0012        |
|    n_updates            | 2010          |
|    policy_gradient_loss | 0.000274      |
|    reward               | 0.018059436   |
|    value_loss           | 0.00628       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.839         |
| time/                   |               |
|    fps                  | 226           |
|    iterations           | 203           |
|    time_elapsed         | 8233          |
|    total_timesteps      | 1867600       |
| train/                  |               |
|    approx_kl            | 0.00018231361 |
|    clip_fraction        | 0.00263       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0947       |
|    explained_variance   | 4.77e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | 0.00442       |
|    n_updates            | 2020          |
|    policy_gradient_loss | 0.000102      |
|    reward               | 0.0           |
|    value_loss           | 0.00613       |
-------------------------------------------


--------------------------------------------
| rollout/                |                |
|    ep_len_mean          | 9.13e+03       |
|    ep_rew_mean          | 0.841          |
| time/                   |                |
|    fps                  | 226            |
|    iterations           | 204            |
|    time_elapsed         | 8276           |
|    total_timesteps      | 1876800        |
| train/                  |                |
|    approx_kl            | 0.000106039806 |
|    clip_fraction        | 0.00374        |
|    clip_range           | 0.2            |
|    entropy_loss         | -0.0884        |
|    explained_variance   | 1.17e-05       |
|    learning_rate        | 0.00025        |
|    loss                 | 0.00228        |
|    n_updates            | 2030           |
|    policy_gradient_loss | -0.000372      |
|    reward               | 0.0009598529   |
|    value_loss           | 0.00533        |
--------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 9.13e+03     |
|    ep_rew_mean          | 0.842        |
| time/                   |              |
|    fps                  | 226          |
|    iterations           | 205          |
|    time_elapsed         | 8316         |
|    total_timesteps      | 1886000      |
| train/                  |              |
|    approx_kl            | 8.348265e-05 |
|    clip_fraction        | 0.0029       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.0893      |
|    explained_variance   | 3.58e-06     |
|    learning_rate        | 0.00025      |
|    loss                 | 0.00183      |
|    n_updates            | 2040         |
|    policy_gradient_loss | 4.9e-05      |
|    reward               | 0.0          |
|    value_loss           | 0.00637      |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.843         |
| time/                   |               |
|    fps                  | 226           |
|    iterations           | 206           |
|    time_elapsed         | 8354          |
|    total_timesteps      | 1895200       |
| train/                  |               |
|    approx_kl            | 0.00022595008 |
|    clip_fraction        | 0.00266       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0923       |
|    explained_variance   | 1.79e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | 0.00118       |
|    n_updates            | 2050          |
|    policy_gradient_loss | 0.000277      |
|    reward               | -0.0063926703 |
|    value_loss           | 0.00545       |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 9.13e+03     |
|    ep_rew_mean          | 0.85         |
| time/                   |              |
|    fps                  | 226          |
|    iterations           | 207          |
|    time_elapsed         | 8394         |
|    total_timesteps      | 1904400      |
| train/                  |              |
|    approx_kl            | 7.176007e-05 |
|    clip_fraction        | 0.00268      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.097       |
|    explained_variance   | 4.23e-06     |
|    learning_rate        | 0.00025      |
|    loss                 | 7.39e-05     |
|    n_updates            | 2060         |
|    policy_gradient_loss | 0.00021      |
|    reward               | 0.0          |
|    value_loss           | 0.00612      |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.855         |
| time/                   |               |
|    fps                  | 226           |
|    iterations           | 208           |
|    time_elapsed         | 8434          |
|    total_timesteps      | 1913600       |
| train/                  |               |
|    approx_kl            | 0.00017811674 |
|    clip_fraction        | 0.00535       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.11         |
|    explained_variance   | 9.72e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | 0.000147      |
|    n_updates            | 2070          |
|    policy_gradient_loss | 3.36e-05      |
|    reward               | 0.010153927   |
|    value_loss           | 0.00542       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.856         |
| time/                   |               |
|    fps                  | 226           |
|    iterations           | 209           |
|    time_elapsed         | 8473          |
|    total_timesteps      | 1922800       |
| train/                  |               |
|    approx_kl            | 0.00036312424 |
|    clip_fraction        | 0.007         |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.111        |
|    explained_variance   | 2.98e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | -0.00417      |
|    n_updates            | 2080          |
|    policy_gradient_loss | -0.000684     |
|    reward               | 0.020741899   |
|    value_loss           | 0.00603       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.851         |
| time/                   |               |
|    fps                  | 226           |
|    iterations           | 210           |
|    time_elapsed         | 8512          |
|    total_timesteps      | 1932000       |
| train/                  |               |
|    approx_kl            | 0.00023181667 |
|    clip_fraction        | 0.00347       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.101        |
|    explained_variance   | 8.17e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | -0.000311     |
|    n_updates            | 2090          |
|    policy_gradient_loss | -3.02e-05     |
|    reward               | 0.0007309358  |
|    value_loss           | 0.00547       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.852         |
| time/                   |               |
|    fps                  | 226           |
|    iterations           | 211           |
|    time_elapsed         | 8552          |
|    total_timesteps      | 1941200       |
| train/                  |               |
|    approx_kl            | 0.00038481175 |
|    clip_fraction        | 0.00571       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.106        |
|    explained_variance   | 4.53e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | 0.00507       |
|    n_updates            | 2100          |
|    policy_gradient_loss | 4.46e-05      |
|    reward               | -0.01705996   |
|    value_loss           | 0.00558       |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 9.13e+03     |
|    ep_rew_mean          | 0.847        |
| time/                   |              |
|    fps                  | 226          |
|    iterations           | 212          |
|    time_elapsed         | 8592         |
|    total_timesteps      | 1950400      |
| train/                  |              |
|    approx_kl            | 0.0001923271 |
|    clip_fraction        | 0.00403      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.101       |
|    explained_variance   | 4.53e-06     |
|    learning_rate        | 0.00025      |
|    loss                 | 0.00204      |
|    n_updates            | 2110         |
|    policy_gradient_loss | -0.00016     |
|    reward               | 0.005170984  |
|    value_loss           | 0.00655      |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 9.13e+03     |
|    ep_rew_mean          | 0.85         |
| time/                   |              |
|    fps                  | 226          |
|    iterations           | 213          |
|    time_elapsed         | 8633         |
|    total_timesteps      | 1959600      |
| train/                  |              |
|    approx_kl            | 0.0003835797 |
|    clip_fraction        | 0.00497      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.106       |
|    explained_variance   | -4.65e-06    |
|    learning_rate        | 0.00025      |
|    loss                 | 0.000849     |
|    n_updates            | 2120         |
|    policy_gradient_loss | -0.000316    |
|    reward               | 0.0007273562 |
|    value_loss           | 0.00556      |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.85          |
| time/                   |               |
|    fps                  | 227           |
|    iterations           | 214           |
|    time_elapsed         | 8672          |
|    total_timesteps      | 1968800       |
| train/                  |               |
|    approx_kl            | 0.00012647893 |
|    clip_fraction        | 0.00105       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.107        |
|    explained_variance   | 6.56e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | 0.00284       |
|    n_updates            | 2130          |
|    policy_gradient_loss | 0.000385      |
|    reward               | 0.0018485195  |
|    value_loss           | 0.00631       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.852         |
| time/                   |               |
|    fps                  | 227           |
|    iterations           | 216           |
|    time_elapsed         | 8752          |
|    total_timesteps      | 1987200       |
| train/                  |               |
|    approx_kl            | 0.00027510425 |
|    clip_fraction        | 0.00484       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.109        |
|    explained_variance   | 1.08e-05      |
|    learning_rate        | 0.00025       |
|    loss                 | 0.0014        |
|    n_updates            | 2150          |
|    policy_gradient_loss | 4.4e-05       |
|    reward               | 0.016714098   |
|    value_loss           | 0.00644       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.85          |
| time/                   |               |
|    fps                  | 227           |
|    iterations           | 217           |
|    time_elapsed         | 8791          |
|    total_timesteps      | 1996400       |
| train/                  |               |
|    approx_kl            | 0.00036577502 |
|    clip_fraction        | 0.00396       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.109        |
|    explained_variance   | 3.16e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | 0.000144      |
|    n_updates            | 2160          |
|    policy_gradient_loss | -0.000125     |
|    reward               | 0.014841702   |
|    value_loss           | 0.00538       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.13e+03      |
|    ep_rew_mean          | 0.846         |
| time/                   |               |
|    fps                  | 227           |
|    iterations           | 218           |
|    time_elapsed         | 8831          |
|    total_timesteps      | 2005600       |
| train/                  |               |
|    approx_kl            | 0.00040839636 |
|    clip_fraction        | 0.00477       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.1          |
|    explained_variance   | 1.29e-05      |
|    learning_rate        | 0.00025       |
|    loss                 | 0.0016        |
|    n_updates            | 2170          |
|    policy_gradient_loss | -0.000271     |
|    reward               | 0.0037693433  |
|    value_loss           | 0.00502       |
-------------------------------------------


Total trained timestep: 2005600


In [None]:
agent.predict(selected_model,full_env,render=True)

In [None]:
agent.plot_multiple()