In [1]:
import base64
import IPython
import imageio

def embed_mp4(filename):
    """Embeds an mp4 file in the notebook."""
    video = open(filename,'rb').read()
    b64 = base64.b64encode(video)
    tag = '''
    <video width="640" height="480" controls>
    <source src="data:video/mp4;base64,{0}" type="video/mp4">
    Your browser does not support the video tag.
    </video>'''.format(b64.decode())

    return IPython.display.HTML(tag)

def record_game(model, env, num_episodes=5, video_filename='video.mp4'):
    with imageio.get_writer(video_filename, fps=60) as video:
        for _ in range(num_episodes):
            obs = env.reset()
            done = False
            total_reward = 0
            video.append_data(env.render('rgb_array'))

            while not done:
                action, _steps = model.predict(obs)
                obs, reward, done, info = env.step(action)
                total_reward += reward
                video.append_data(env.render('rgb_array'))

            print("score:", total_reward)

In [2]:
import os
import gym
import slimevolleygym
from slimevolleygym import SurvivalRewardEnv

from stable_baselines.common.policies import MlpPolicy
from stable_baselines import logger
from stable_baselines.common.callbacks import EvalCallback

from stable_baselines.ppo1 import PPO1
from stable_baselines import A2C, ACER, ACKTR, DQN, HER, GAIL, TRPO

algo = {
    'a2c': A2C,
    'acer': ACER,
    'acktr': ACKTR,
    'dqn': DQN,
    'her': HER,
    'gail': GAIL,
    'trpo': TRPO,
}
trained_model = {}

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



In [3]:
def experiment(model_str, timesteps=15_000_000):
    NUM_TIMESTEPS = int(timesteps)
    SEED = 721
    EVAL_FREQ = 25000
    EVAL_EPISODES = 10  # was 1000
    LOGDIR = "sb-" + model_str

    logger.configure(folder=LOGDIR)
    env = gym.make("SlimeVolley-v0")
    env.seed(SEED)

    model = algo[model_str](MlpPolicy, env, verbose=2)
    trained_model[model_str] = model
    eval_callback = EvalCallback(env, best_model_save_path=LOGDIR, log_path=LOGDIR, eval_freq=EVAL_FREQ, n_eval_episodes=EVAL_EPISODES)
    model.learn(total_timesteps=NUM_TIMESTEPS, callback=eval_callback)
    model.save(os.path.join(LOGDIR, "final_model")) # probably never get to this point.
    env.close()

In [6]:
experiment('acktr')

Logging to sb-acktr
Wrapping the env in a DummyVecEnv.
updating 34 eigenvalue/vectors
projecting 12 gradient matrices
----------------------------------
| explained_variance | -1.54e+03 |
| fps                | 29        |
| nupdates           | 1         |
| policy_entropy     | 2.08      |
| policy_loss        | 0.291     |
| total_timesteps    | 20        |
| value_loss         | 0.0336    |
----------------------------------
---------------------------------
| explained_variance | -0.908   |
| fps                | 712      |
| nupdates           | 100      |
| policy_entropy     | 2.07     |
| policy_loss        | 0.118    |
| total_timesteps    | 2000     |
| value_loss         | 0.00883  |
---------------------------------
---------------------------------
| explained_variance | 0.75     |
| fps                | 795      |
| nupdates           | 200      |
| policy_entropy     | 2        |
| policy_loss        | 0.19     |
| total_timesteps    | 4000     |
| value_loss         | 

---------------------------------
| explained_variance | 0.292    |
| fps                | 785      |
| nupdates           | 2600     |
| policy_entropy     | 1.88     |
| policy_loss        | -0.86    |
| total_timesteps    | 52000    |
| value_loss         | 0.329    |
---------------------------------
---------------------------------
| explained_variance | -1.06    |
| fps                | 790      |
| nupdates           | 2700     |
| policy_entropy     | 2.06     |
| policy_loss        | -0.111   |
| total_timesteps    | 54000    |
| value_loss         | 0.00266  |
---------------------------------
---------------------------------
| explained_variance | -0.234   |
| fps                | 793      |
| nupdates           | 2800     |
| policy_entropy     | 2.06     |
| policy_loss        | 0.0586   |
| total_timesteps    | 56000    |
| value_loss         | 0.00235  |
---------------------------------
---------------------------------
| explained_variance | 0.988    |
| fps         

---------------------------------
| explained_variance | 0.932    |
| fps                | 811      |
| nupdates           | 5300     |
| policy_entropy     | 2.06     |
| policy_loss        | -0.084   |
| total_timesteps    | 106000   |
| value_loss         | 0.00107  |
---------------------------------
---------------------------------
| explained_variance | 0.982    |
| fps                | 813      |
| nupdates           | 5400     |
| policy_entropy     | 2.04     |
| policy_loss        | 6.21e-05 |
| total_timesteps    | 108000   |
| value_loss         | 0.000158 |
---------------------------------
---------------------------------
| explained_variance | -287     |
| fps                | 816      |
| nupdates           | 5500     |
| policy_entropy     | 2.06     |
| policy_loss        | 0.326    |
| total_timesteps    | 110000   |
| value_loss         | 0.0907   |
---------------------------------
---------------------------------
| explained_variance | 0.499    |
| fps         

---------------------------------
| explained_variance | -1.08    |
| fps                | 816      |
| nupdates           | 8000     |
| policy_entropy     | 2.05     |
| policy_loss        | -0.141   |
| total_timesteps    | 160000   |
| value_loss         | 0.00744  |
---------------------------------
---------------------------------
| explained_variance | -1       |
| fps                | 817      |
| nupdates           | 8100     |
| policy_entropy     | 2.05     |
| policy_loss        | 0.139    |
| total_timesteps    | 162000   |
| value_loss         | 0.00824  |
---------------------------------
---------------------------------
| explained_variance | -2.84    |
| fps                | 819      |
| nupdates           | 8200     |
| policy_entropy     | 1.97     |
| policy_loss        | -0.174   |
| total_timesteps    | 164000   |
| value_loss         | 0.00813  |
---------------------------------
---------------------------------
| explained_variance | -1.4     |
| fps         

---------------------------------
| explained_variance | 0.862    |
| fps                | 821      |
| nupdates           | 10700    |
| policy_entropy     | 2.07     |
| policy_loss        | -0.0781  |
| total_timesteps    | 214000   |
| value_loss         | 0.00108  |
---------------------------------
---------------------------------
| explained_variance | 0.955    |
| fps                | 821      |
| nupdates           | 10800    |
| policy_entropy     | 2.07     |
| policy_loss        | -0.00759 |
| total_timesteps    | 216000   |
| value_loss         | 0.000703 |
---------------------------------
---------------------------------
| explained_variance | -4.2e+03 |
| fps                | 822      |
| nupdates           | 10900    |
| policy_entropy     | 2.06     |
| policy_loss        | 0.831    |
| total_timesteps    | 218000   |
| value_loss         | 0.173    |
---------------------------------
---------------------------------
| explained_variance | 0.836    |
| fps         

---------------------------------
| explained_variance | 0.66     |
| fps                | 826      |
| nupdates           | 13400    |
| policy_entropy     | 2.05     |
| policy_loss        | 0.0369   |
| total_timesteps    | 268000   |
| value_loss         | 0.0417   |
---------------------------------
---------------------------------
| explained_variance | 0.219    |
| fps                | 826      |
| nupdates           | 13500    |
| policy_entropy     | 2.05     |
| policy_loss        | -0.0516  |
| total_timesteps    | 270000   |
| value_loss         | 0.00307  |
---------------------------------
---------------------------------
| explained_variance | 0.76     |
| fps                | 826      |
| nupdates           | 13600    |
| policy_entropy     | 2.05     |
| policy_loss        | -0.0621  |
| total_timesteps    | 272000   |
| value_loss         | 0.000718 |
---------------------------------
---------------------------------
| explained_variance | 0.693    |
| fps         

---------------------------------
| explained_variance | 0.996    |
| fps                | 816      |
| nupdates           | 16100    |
| policy_entropy     | 2.07     |
| policy_loss        | -0.00894 |
| total_timesteps    | 322000   |
| value_loss         | 0.000435 |
---------------------------------
---------------------------------
| explained_variance | 0.546    |
| fps                | 815      |
| nupdates           | 16200    |
| policy_entropy     | 2.03     |
| policy_loss        | -0.607   |
| total_timesteps    | 324000   |
| value_loss         | 0.164    |
---------------------------------
Eval num_timesteps=325000, episode_reward=-5.00 +/- 0.00
Episode length: 541.20 +/- 71.57
---------------------------------
| explained_variance | 0.623    |
| fps                | 809      |
| nupdates           | 16300    |
| policy_entropy     | 2.06     |
| policy_loss        | -0.225   |
| total_timesteps    | 326000   |
| value_loss         | 0.0854   |
--------------------------

Eval num_timesteps=375000, episode_reward=-4.90 +/- 0.30
Episode length: 589.30 +/- 91.35
---------------------------------
| explained_variance | 0.374    |
| fps                | 803      |
| nupdates           | 18800    |
| policy_entropy     | 2.08     |
| policy_loss        | -0.0763  |
| total_timesteps    | 376000   |
| value_loss         | 0.00127  |
---------------------------------
---------------------------------
| explained_variance | -0.941   |
| fps                | 804      |
| nupdates           | 18900    |
| policy_entropy     | 2.04     |
| policy_loss        | 0.107    |
| total_timesteps    | 378000   |
| value_loss         | 0.00967  |
---------------------------------
---------------------------------
| explained_variance | 0.824    |
| fps                | 804      |
| nupdates           | 19000    |
| policy_entropy     | 2.08     |
| policy_loss        | -0.0319  |
| total_timesteps    | 380000   |
| value_loss         | 0.000162 |
--------------------------

---------------------------------
| explained_variance | 0.477    |
| fps                | 805      |
| nupdates           | 21400    |
| policy_entropy     | 2.07     |
| policy_loss        | -0.713   |
| total_timesteps    | 428000   |
| value_loss         | 0.182    |
---------------------------------
---------------------------------
| explained_variance | 0.43     |
| fps                | 806      |
| nupdates           | 21500    |
| policy_entropy     | 2.06     |
| policy_loss        | -0.258   |
| total_timesteps    | 430000   |
| value_loss         | 0.014    |
---------------------------------
---------------------------------
| explained_variance | 0.92     |
| fps                | 807      |
| nupdates           | 21600    |
| policy_entropy     | 2.07     |
| policy_loss        | 0.066    |
| total_timesteps    | 432000   |
| value_loss         | 0.00203  |
---------------------------------
---------------------------------
| explained_variance | -0.94    |
| fps         

---------------------------------
| explained_variance | -1.89    |
| fps                | 809      |
| nupdates           | 24100    |
| policy_entropy     | 2.08     |
| policy_loss        | -0.0817  |
| total_timesteps    | 482000   |
| value_loss         | 0.00358  |
---------------------------------
---------------------------------
| explained_variance | 0.926    |
| fps                | 809      |
| nupdates           | 24200    |
| policy_entropy     | 2.06     |
| policy_loss        | -0.0592  |
| total_timesteps    | 484000   |
| value_loss         | 0.000527 |
---------------------------------
---------------------------------
| explained_variance | -0.139   |
| fps                | 810      |
| nupdates           | 24300    |
| policy_entropy     | 2.07     |
| policy_loss        | -0.112   |
| total_timesteps    | 486000   |
| value_loss         | 0.00456  |
---------------------------------
---------------------------------
| explained_variance | 0.624    |
| fps         

---------------------------------
| explained_variance | 0.425    |
| fps                | 811      |
| nupdates           | 26800    |
| policy_entropy     | 2.07     |
| policy_loss        | 0.00366  |
| total_timesteps    | 536000   |
| value_loss         | 0.00166  |
---------------------------------
---------------------------------
| explained_variance | 0.697    |
| fps                | 812      |
| nupdates           | 26900    |
| policy_entropy     | 2.08     |
| policy_loss        | -0.094   |
| total_timesteps    | 538000   |
| value_loss         | 0.00166  |
---------------------------------
---------------------------------
| explained_variance | 0.896    |
| fps                | 812      |
| nupdates           | 27000    |
| policy_entropy     | 1.97     |
| policy_loss        | 0.0889   |
| total_timesteps    | 540000   |
| value_loss         | 0.00324  |
---------------------------------
---------------------------------
| explained_variance | 0.701    |
| fps         

---------------------------------
| explained_variance | 0.75     |
| fps                | 815      |
| nupdates           | 29500    |
| policy_entropy     | 2.04     |
| policy_loss        | -0.326   |
| total_timesteps    | 590000   |
| value_loss         | 0.0731   |
---------------------------------
---------------------------------
| explained_variance | 0.641    |
| fps                | 816      |
| nupdates           | 29600    |
| policy_entropy     | 2.07     |
| policy_loss        | 0.0322   |
| total_timesteps    | 592000   |
| value_loss         | 0.0405   |
---------------------------------
---------------------------------
| explained_variance | -24.8    |
| fps                | 816      |
| nupdates           | 29700    |
| policy_entropy     | 2.07     |
| policy_loss        | -0.0337  |
| total_timesteps    | 594000   |
| value_loss         | 0.0118   |
---------------------------------
---------------------------------
| explained_variance | 0.931    |
| fps         

---------------------------------
| explained_variance | 0.962    |
| fps                | 816      |
| nupdates           | 32200    |
| policy_entropy     | 2.07     |
| policy_loss        | 0.0109   |
| total_timesteps    | 644000   |
| value_loss         | 0.000371 |
---------------------------------
---------------------------------
| explained_variance | -4.32    |
| fps                | 816      |
| nupdates           | 32300    |
| policy_entropy     | 2.07     |
| policy_loss        | 0.223    |
| total_timesteps    | 646000   |
| value_loss         | 0.016    |
---------------------------------
---------------------------------
| explained_variance | 0.92     |
| fps                | 817      |
| nupdates           | 32400    |
| policy_entropy     | 2.07     |
| policy_loss        | -0.0132  |
| total_timesteps    | 648000   |
| value_loss         | 3.19e-05 |
---------------------------------
Eval num_timesteps=650000, episode_reward=-4.90 +/- 0.30
Episode length: 605.60 +/

---------------------------------
| explained_variance | 0.504    |
| fps                | 818      |
| nupdates           | 34900    |
| policy_entropy     | 2.02     |
| policy_loss        | -0.0167  |
| total_timesteps    | 698000   |
| value_loss         | 0.0399   |
---------------------------------
Eval num_timesteps=700000, episode_reward=-5.00 +/- 0.00
Episode length: 607.10 +/- 82.20
---------------------------------
| explained_variance | -0.768   |
| fps                | 814      |
| nupdates           | 35000    |
| policy_entropy     | 2.08     |
| policy_loss        | -0.102   |
| total_timesteps    | 700000   |
| value_loss         | 0.00405  |
---------------------------------
---------------------------------
| explained_variance | 0.844    |
| fps                | 815      |
| nupdates           | 35100    |
| policy_entropy     | 2.07     |
| policy_loss        | -0.0615  |
| total_timesteps    | 702000   |
| value_loss         | 0.000609 |
--------------------------

---------------------------------
| explained_variance | 0.0235   |
| fps                | 815      |
| nupdates           | 37600    |
| policy_entropy     | 2.06     |
| policy_loss        | -0.707   |
| total_timesteps    | 752000   |
| value_loss         | 0.144    |
---------------------------------
---------------------------------
| explained_variance | -85.2    |
| fps                | 815      |
| nupdates           | 37700    |
| policy_entropy     | 2.05     |
| policy_loss        | 1.31     |
| total_timesteps    | 754000   |
| value_loss         | 0.438    |
---------------------------------
---------------------------------
| explained_variance | -0.255   |
| fps                | 816      |
| nupdates           | 37800    |
| policy_entropy     | 2.05     |
| policy_loss        | -0.18    |
| total_timesteps    | 756000   |
| value_loss         | 0.00952  |
---------------------------------
----------------------------------
| explained_variance | 0.848     |
| fps       

---------------------------------
| explained_variance | 0.981    |
| fps                | 815      |
| nupdates           | 40300    |
| policy_entropy     | 2.05     |
| policy_loss        | 0.0161   |
| total_timesteps    | 806000   |
| value_loss         | 0.000361 |
---------------------------------
---------------------------------
| explained_variance | 0.721    |
| fps                | 815      |
| nupdates           | 40400    |
| policy_entropy     | 2.06     |
| policy_loss        | -0.08    |
| total_timesteps    | 808000   |
| value_loss         | 0.00126  |
---------------------------------
---------------------------------
| explained_variance | 0.495    |
| fps                | 816      |
| nupdates           | 40500    |
| policy_entropy     | 2.07     |
| policy_loss        | -0.473   |
| total_timesteps    | 810000   |
| value_loss         | 0.0995   |
---------------------------------
---------------------------------
| explained_variance | -0.617   |
| fps         

---------------------------------
| explained_variance | 0.846    |
| fps                | 810      |
| nupdates           | 43000    |
| policy_entropy     | 2.08     |
| policy_loss        | -0.0735  |
| total_timesteps    | 860000   |
| value_loss         | 0.000946 |
---------------------------------
---------------------------------
| explained_variance | 0.969    |
| fps                | 810      |
| nupdates           | 43100    |
| policy_entropy     | 2.06     |
| policy_loss        | 0.0594   |
| total_timesteps    | 862000   |
| value_loss         | 0.00145  |
---------------------------------
---------------------------------
| explained_variance | 0.785    |
| fps                | 810      |
| nupdates           | 43200    |
| policy_entropy     | 1.84     |
| policy_loss        | -0.0239  |
| total_timesteps    | 864000   |
| value_loss         | 0.0406   |
---------------------------------
---------------------------------
| explained_variance | 0.564    |
| fps         

---------------------------------
| explained_variance | 0.923    |
| fps                | 804      |
| nupdates           | 45700    |
| policy_entropy     | 2.07     |
| policy_loss        | -0.0123  |
| total_timesteps    | 914000   |
| value_loss         | 7.97e-05 |
---------------------------------
---------------------------------
| explained_variance | 0.82     |
| fps                | 804      |
| nupdates           | 45800    |
| policy_entropy     | 2.08     |
| policy_loss        | 0.0269   |
| total_timesteps    | 916000   |
| value_loss         | 0.000591 |
---------------------------------
---------------------------------
| explained_variance | 0.722    |
| fps                | 804      |
| nupdates           | 45900    |
| policy_entropy     | 2.07     |
| policy_loss        | -0.141   |
| total_timesteps    | 918000   |
| value_loss         | 0.0042   |
---------------------------------
---------------------------------
| explained_variance | 0.135    |
| fps         

---------------------------------
| explained_variance | 0.864    |
| fps                | 800      |
| nupdates           | 48400    |
| policy_entropy     | 2.05     |
| policy_loss        | 0.127    |
| total_timesteps    | 968000   |
| value_loss         | 0.00545  |
---------------------------------
---------------------------------
| explained_variance | -0.0574  |
| fps                | 800      |
| nupdates           | 48500    |
| policy_entropy     | 2.05     |
| policy_loss        | -0.0588  |
| total_timesteps    | 970000   |
| value_loss         | 0.000966 |
---------------------------------
---------------------------------
| explained_variance | -0.128   |
| fps                | 800      |
| nupdates           | 48600    |
| policy_entropy     | 2.03     |
| policy_loss        | -0.0643  |
| total_timesteps    | 972000   |
| value_loss         | 0.00307  |
---------------------------------
---------------------------------
| explained_variance | 0.98     |
| fps         

---------------------------------
| explained_variance | 0.961    |
| fps                | 796      |
| nupdates           | 51100    |
| policy_entropy     | 2.06     |
| policy_loss        | -0.0435  |
| total_timesteps    | 1022000  |
| value_loss         | 0.0002   |
---------------------------------
---------------------------------
| explained_variance | -0.728   |
| fps                | 796      |
| nupdates           | 51200    |
| policy_entropy     | 2.05     |
| policy_loss        | -0.139   |
| total_timesteps    | 1024000  |
| value_loss         | 0.0104   |
---------------------------------
Eval num_timesteps=1025000, episode_reward=-5.00 +/- 0.00
Episode length: 626.90 +/- 114.17
---------------------------------
| explained_variance | 0.885    |
| fps                | 794      |
| nupdates           | 51300    |
| policy_entropy     | 2.05     |
| policy_loss        | 0.0246   |
| total_timesteps    | 1026000  |
| value_loss         | 0.000599 |
------------------------

Eval num_timesteps=1075000, episode_reward=-4.80 +/- 0.40
Episode length: 588.20 +/- 90.15
---------------------------------
| explained_variance | 0.994    |
| fps                | 791      |
| nupdates           | 53800    |
| policy_entropy     | 2.07     |
| policy_loss        | 0.00215  |
| total_timesteps    | 1076000  |
| value_loss         | 0.000133 |
---------------------------------
---------------------------------
| explained_variance | -9.4     |
| fps                | 791      |
| nupdates           | 53900    |
| policy_entropy     | 2.07     |
| policy_loss        | 0.126    |
| total_timesteps    | 1078000  |
| value_loss         | 0.022    |
---------------------------------
---------------------------------
| explained_variance | 0.039    |
| fps                | 791      |
| nupdates           | 54000    |
| policy_entropy     | 2.07     |
| policy_loss        | -0.162   |
| total_timesteps    | 1080000  |
| value_loss         | 0.00665  |
-------------------------

---------------------------------
| explained_variance | 0.632    |
| fps                | 788      |
| nupdates           | 56400    |
| policy_entropy     | 2.07     |
| policy_loss        | 0.0891   |
| total_timesteps    | 1128000  |
| value_loss         | 0.00346  |
---------------------------------
---------------------------------
| explained_variance | 0.254    |
| fps                | 788      |
| nupdates           | 56500    |
| policy_entropy     | 2.08     |
| policy_loss        | -0.0751  |
| total_timesteps    | 1130000  |
| value_loss         | 0.000979 |
---------------------------------
---------------------------------
| explained_variance | -0.11    |
| fps                | 788      |
| nupdates           | 56600    |
| policy_entropy     | 2.07     |
| policy_loss        | 0.0774   |
| total_timesteps    | 1132000  |
| value_loss         | 0.00341  |
---------------------------------
---------------------------------
| explained_variance | 0.821    |
| fps         

---------------------------------
| explained_variance | 0.46     |
| fps                | 786      |
| nupdates           | 59100    |
| policy_entropy     | 2.07     |
| policy_loss        | 0.0203   |
| total_timesteps    | 1182000  |
| value_loss         | 0.000508 |
---------------------------------
---------------------------------
| explained_variance | -1.67    |
| fps                | 786      |
| nupdates           | 59200    |
| policy_entropy     | 2.07     |
| policy_loss        | 0.171    |
| total_timesteps    | 1184000  |
| value_loss         | 0.012    |
---------------------------------
---------------------------------
| explained_variance | -4.23    |
| fps                | 786      |
| nupdates           | 59300    |
| policy_entropy     | 2.07     |
| policy_loss        | -0.0142  |
| total_timesteps    | 1186000  |
| value_loss         | 0.00163  |
---------------------------------
---------------------------------
| explained_variance | -0.0919  |
| fps         

---------------------------------
| explained_variance | -176     |
| fps                | 783      |
| nupdates           | 61800    |
| policy_entropy     | 2.06     |
| policy_loss        | 0.673    |
| total_timesteps    | 1236000  |
| value_loss         | 0.208    |
---------------------------------
---------------------------------
| explained_variance | -1.39    |
| fps                | 783      |
| nupdates           | 61900    |
| policy_entropy     | 2.07     |
| policy_loss        | 0.101    |
| total_timesteps    | 1238000  |
| value_loss         | 0.00654  |
---------------------------------
---------------------------------
| explained_variance | -0.42    |
| fps                | 783      |
| nupdates           | 62000    |
| policy_entropy     | 2.06     |
| policy_loss        | 0.071    |
| total_timesteps    | 1240000  |
| value_loss         | 0.00597  |
---------------------------------
---------------------------------
| explained_variance | 0.13     |
| fps         

---------------------------------
| explained_variance | -2.36    |
| fps                | 781      |
| nupdates           | 64500    |
| policy_entropy     | 2.07     |
| policy_loss        | -0.287   |
| total_timesteps    | 1290000  |
| value_loss         | 0.0244   |
---------------------------------
---------------------------------
| explained_variance | 0.6      |
| fps                | 781      |
| nupdates           | 64600    |
| policy_entropy     | 2.07     |
| policy_loss        | 0.00752  |
| total_timesteps    | 1292000  |
| value_loss         | 0.000291 |
---------------------------------
---------------------------------
| explained_variance | 0.137    |
| fps                | 781      |
| nupdates           | 64700    |
| policy_entropy     | 2.07     |
| policy_loss        | 0.103    |
| total_timesteps    | 1294000  |
| value_loss         | 0.00441  |
---------------------------------
---------------------------------
| explained_variance | 0.794    |
| fps         

---------------------------------
| explained_variance | 0.173    |
| fps                | 778      |
| nupdates           | 67200    |
| policy_entropy     | 2.05     |
| policy_loss        | -0.0767  |
| total_timesteps    | 1344000  |
| value_loss         | 0.00183  |
---------------------------------
---------------------------------
| explained_variance | 0.0997   |
| fps                | 778      |
| nupdates           | 67300    |
| policy_entropy     | 2.05     |
| policy_loss        | 0.0153   |
| total_timesteps    | 1346000  |
| value_loss         | 0.00285  |
---------------------------------
---------------------------------
| explained_variance | 0.0726   |
| fps                | 778      |
| nupdates           | 67400    |
| policy_entropy     | 2.03     |
| policy_loss        | -0.167   |
| total_timesteps    | 1348000  |
| value_loss         | 0.00801  |
---------------------------------
Eval num_timesteps=1350000, episode_reward=-4.90 +/- 0.30
Episode length: 585.40 +

---------------------------------
| explained_variance | 0.743    |
| fps                | 776      |
| nupdates           | 69900    |
| policy_entropy     | 2.07     |
| policy_loss        | 0.0474   |
| total_timesteps    | 1398000  |
| value_loss         | 0.00154  |
---------------------------------
Eval num_timesteps=1400000, episode_reward=-5.00 +/- 0.00
Episode length: 610.60 +/- 115.64
---------------------------------
| explained_variance | 0.669    |
| fps                | 774      |
| nupdates           | 70000    |
| policy_entropy     | 2.07     |
| policy_loss        | 0.087    |
| total_timesteps    | 1400000  |
| value_loss         | 0.00335  |
---------------------------------
---------------------------------
| explained_variance | 0.59     |
| fps                | 774      |
| nupdates           | 70100    |
| policy_entropy     | 2.07     |
| policy_loss        | -0.00363 |
| total_timesteps    | 1402000  |
| value_loss         | 0.000564 |
------------------------

---------------------------------
| explained_variance | 0.573    |
| fps                | 773      |
| nupdates           | 72600    |
| policy_entropy     | 2.06     |
| policy_loss        | -0.0805  |
| total_timesteps    | 1452000  |
| value_loss         | 0.00174  |
---------------------------------
---------------------------------
| explained_variance | 0.292    |
| fps                | 773      |
| nupdates           | 72700    |
| policy_entropy     | 2.07     |
| policy_loss        | -0.083   |
| total_timesteps    | 1454000  |
| value_loss         | 0.00147  |
---------------------------------
---------------------------------
| explained_variance | -13.1    |
| fps                | 773      |
| nupdates           | 72800    |
| policy_entropy     | 2.08     |
| policy_loss        | 0.0961   |
| total_timesteps    | 1456000  |
| value_loss         | 0.0135   |
---------------------------------
---------------------------------
| explained_variance | 0.958    |
| fps         

---------------------------------
| explained_variance | -1.56    |
| fps                | 770      |
| nupdates           | 75300    |
| policy_entropy     | 2.07     |
| policy_loss        | 0.129    |
| total_timesteps    | 1506000  |
| value_loss         | 0.00646  |
---------------------------------
---------------------------------
| explained_variance | 0.629    |
| fps                | 770      |
| nupdates           | 75400    |
| policy_entropy     | 2.07     |
| policy_loss        | -0.0525  |
| total_timesteps    | 1508000  |
| value_loss         | 0.000505 |
---------------------------------
---------------------------------
| explained_variance | -0.956   |
| fps                | 770      |
| nupdates           | 75500    |
| policy_entropy     | 2.07     |
| policy_loss        | -0.226   |
| total_timesteps    | 1510000  |
| value_loss         | 0.0141   |
---------------------------------
---------------------------------
| explained_variance | 0.963    |
| fps         

---------------------------------
| explained_variance | 0.81     |
| fps                | 765      |
| nupdates           | 78000    |
| policy_entropy     | 2.07     |
| policy_loss        | 0.0946   |
| total_timesteps    | 1560000  |
| value_loss         | 0.00378  |
---------------------------------
---------------------------------
| explained_variance | 0.882    |
| fps                | 765      |
| nupdates           | 78100    |
| policy_entropy     | 2.06     |
| policy_loss        | -0.0527  |
| total_timesteps    | 1562000  |
| value_loss         | 0.000589 |
---------------------------------
----------------------------------
| explained_variance | -0.653    |
| fps                | 765       |
| nupdates           | 78200     |
| policy_entropy     | 2.05      |
| policy_loss        | -0.000619 |
| total_timesteps    | 1564000   |
| value_loss         | 0.00109   |
----------------------------------
---------------------------------
| explained_variance | 0.908    |
| fps

---------------------------------
| explained_variance | 0.0206   |
| fps                | 763      |
| nupdates           | 80700    |
| policy_entropy     | 2.05     |
| policy_loss        | 0.071    |
| total_timesteps    | 1614000  |
| value_loss         | 0.00245  |
---------------------------------
---------------------------------
| explained_variance | 0.0983   |
| fps                | 763      |
| nupdates           | 80800    |
| policy_entropy     | 2.05     |
| policy_loss        | 0.112    |
| total_timesteps    | 1616000  |
| value_loss         | 0.00537  |
---------------------------------
---------------------------------
| explained_variance | 0.58     |
| fps                | 764      |
| nupdates           | 80900    |
| policy_entropy     | 2.05     |
| policy_loss        | 0.0207   |
| total_timesteps    | 1618000  |
| value_loss         | 0.00169  |
---------------------------------
---------------------------------
| explained_variance | 0.652    |
| fps         

---------------------------------
| explained_variance | 0.951    |
| fps                | 761      |
| nupdates           | 83400    |
| policy_entropy     | 2.06     |
| policy_loss        | -0.237   |
| total_timesteps    | 1668000  |
| value_loss         | 0.0123   |
---------------------------------
---------------------------------
| explained_variance | 0.58     |
| fps                | 761      |
| nupdates           | 83500    |
| policy_entropy     | 2.05     |
| policy_loss        | -0.275   |
| total_timesteps    | 1670000  |
| value_loss         | 0.0411   |
---------------------------------
---------------------------------
| explained_variance | 0.337    |
| fps                | 761      |
| nupdates           | 83600    |
| policy_entropy     | 2.07     |
| policy_loss        | -0.116   |
| total_timesteps    | 1672000  |
| value_loss         | 0.00311  |
---------------------------------
---------------------------------
| explained_variance | 0.645    |
| fps         

---------------------------------
| explained_variance | 0.644    |
| fps                | 761      |
| nupdates           | 86100    |
| policy_entropy     | 2.08     |
| policy_loss        | -0.465   |
| total_timesteps    | 1722000  |
| value_loss         | 0.0984   |
---------------------------------
---------------------------------
| explained_variance | 0.584    |
| fps                | 761      |
| nupdates           | 86200    |
| policy_entropy     | 2.06     |
| policy_loss        | -0.0657  |
| total_timesteps    | 1724000  |
| value_loss         | 0.000933 |
---------------------------------
Eval num_timesteps=1725000, episode_reward=-4.90 +/- 0.30
Episode length: 561.30 +/- 88.64
---------------------------------
| explained_variance | 0.802    |
| fps                | 760      |
| nupdates           | 86300    |
| policy_entropy     | 2.04     |
| policy_loss        | -0.157   |
| total_timesteps    | 1726000  |
| value_loss         | 0.00508  |
-------------------------

Eval num_timesteps=1775000, episode_reward=-5.00 +/- 0.00
Episode length: 602.60 +/- 105.65
---------------------------------
| explained_variance | -0.0696  |
| fps                | 761      |
| nupdates           | 88800    |
| policy_entropy     | 2.07     |
| policy_loss        | 0.025    |
| total_timesteps    | 1776000  |
| value_loss         | 0.000787 |
---------------------------------
---------------------------------
| explained_variance | 0.667    |
| fps                | 761      |
| nupdates           | 88900    |
| policy_entropy     | 2.07     |
| policy_loss        | -0.168   |
| total_timesteps    | 1778000  |
| value_loss         | 0.0636   |
---------------------------------
---------------------------------
| explained_variance | 0.112    |
| fps                | 761      |
| nupdates           | 89000    |
| policy_entropy     | 2.02     |
| policy_loss        | 0.0549   |
| total_timesteps    | 1780000  |
| value_loss         | 0.00229  |
------------------------

---------------------------------
| explained_variance | 0.66     |
| fps                | 758      |
| nupdates           | 91400    |
| policy_entropy     | 2.07     |
| policy_loss        | 0.0355   |
| total_timesteps    | 1828000  |
| value_loss         | 0.00114  |
---------------------------------
---------------------------------
| explained_variance | 0.485    |
| fps                | 758      |
| nupdates           | 91500    |
| policy_entropy     | 2.07     |
| policy_loss        | -0.737   |
| total_timesteps    | 1830000  |
| value_loss         | 0.178    |
---------------------------------
---------------------------------
| explained_variance | 0.619    |
| fps                | 758      |
| nupdates           | 91600    |
| policy_entropy     | 2.08     |
| policy_loss        | -0.141   |
| total_timesteps    | 1832000  |
| value_loss         | 0.00421  |
---------------------------------
---------------------------------
| explained_variance | -0.487   |
| fps         

---------------------------------
| explained_variance | 0.882    |
| fps                | 756      |
| nupdates           | 94100    |
| policy_entropy     | 2.07     |
| policy_loss        | -0.0358  |
| total_timesteps    | 1882000  |
| value_loss         | 0.000104 |
---------------------------------
---------------------------------
| explained_variance | 0.719    |
| fps                | 756      |
| nupdates           | 94200    |
| policy_entropy     | 2.05     |
| policy_loss        | 0.0901   |
| total_timesteps    | 1884000  |
| value_loss         | 0.0456   |
---------------------------------
---------------------------------
| explained_variance | 0.893    |
| fps                | 756      |
| nupdates           | 94300    |
| policy_entropy     | 2.07     |
| policy_loss        | -0.0343  |
| total_timesteps    | 1886000  |
| value_loss         | 0.000283 |
---------------------------------
---------------------------------
| explained_variance | 0.93     |
| fps         

---------------------------------
| explained_variance | 0.29     |
| fps                | 755      |
| nupdates           | 96800    |
| policy_entropy     | 2.07     |
| policy_loss        | 0.112    |
| total_timesteps    | 1936000  |
| value_loss         | 0.0356   |
---------------------------------
---------------------------------
| explained_variance | 0.57     |
| fps                | 755      |
| nupdates           | 96900    |
| policy_entropy     | 2.04     |
| policy_loss        | -0.138   |
| total_timesteps    | 1938000  |
| value_loss         | 0.0739   |
---------------------------------
---------------------------------
| explained_variance | 0.881    |
| fps                | 755      |
| nupdates           | 97000    |
| policy_entropy     | 2.07     |
| policy_loss        | -0.0814  |
| total_timesteps    | 1940000  |
| value_loss         | 0.00117  |
---------------------------------
---------------------------------
| explained_variance | 0.816    |
| fps         

---------------------------------
| explained_variance | 0.0265   |
| fps                | 754      |
| nupdates           | 99500    |
| policy_entropy     | 2.06     |
| policy_loss        | -0.0954  |
| total_timesteps    | 1990000  |
| value_loss         | 0.00307  |
---------------------------------
---------------------------------
| explained_variance | 0.966    |
| fps                | 754      |
| nupdates           | 99600    |
| policy_entropy     | 2.06     |
| policy_loss        | -0.0281  |
| total_timesteps    | 1992000  |
| value_loss         | 0.000122 |
---------------------------------
---------------------------------
| explained_variance | 0.957    |
| fps                | 754      |
| nupdates           | 99700    |
| policy_entropy     | 2.07     |
| policy_loss        | 0.0137   |
| total_timesteps    | 1994000  |
| value_loss         | 0.000322 |
---------------------------------
---------------------------------
| explained_variance | 0.524    |
| fps         

---------------------------------
| explained_variance | 0.857    |
| fps                | 752      |
| nupdates           | 102200   |
| policy_entropy     | 2.07     |
| policy_loss        | -0.0382  |
| total_timesteps    | 2044000  |
| value_loss         | 0.000462 |
---------------------------------
---------------------------------
| explained_variance | 0.854    |
| fps                | 752      |
| nupdates           | 102300   |
| policy_entropy     | 2.07     |
| policy_loss        | -0.0427  |
| total_timesteps    | 2046000  |
| value_loss         | 0.000367 |
---------------------------------
---------------------------------
| explained_variance | -0.168   |
| fps                | 752      |
| nupdates           | 102400   |
| policy_entropy     | 2.07     |
| policy_loss        | -0.222   |
| total_timesteps    | 2048000  |
| value_loss         | 0.013    |
---------------------------------
Eval num_timesteps=2050000, episode_reward=-4.80 +/- 0.40
Episode length: 594.50 +

---------------------------------
| explained_variance | -11.2    |
| fps                | 751      |
| nupdates           | 104900   |
| policy_entropy     | 2.07     |
| policy_loss        | 0.727    |
| total_timesteps    | 2098000  |
| value_loss         | 0.145    |
---------------------------------
Eval num_timesteps=2100000, episode_reward=-4.90 +/- 0.30
Episode length: 583.60 +/- 142.01
---------------------------------
| explained_variance | 0.591    |
| fps                | 750      |
| nupdates           | 105000   |
| policy_entropy     | 2.08     |
| policy_loss        | 0.0212   |
| total_timesteps    | 2100000  |
| value_loss         | 0.00053  |
---------------------------------
---------------------------------
| explained_variance | 0.867    |
| fps                | 750      |
| nupdates           | 105100   |
| policy_entropy     | 2.07     |
| policy_loss        | 0.00959  |
| total_timesteps    | 2102000  |
| value_loss         | 0.000421 |
------------------------

---------------------------------
| explained_variance | 0.395    |
| fps                | 748      |
| nupdates           | 107600   |
| policy_entropy     | 2.07     |
| policy_loss        | -0.0801  |
| total_timesteps    | 2152000  |
| value_loss         | 0.00139  |
---------------------------------
---------------------------------
| explained_variance | -75      |
| fps                | 748      |
| nupdates           | 107700   |
| policy_entropy     | 2.07     |
| policy_loss        | 0.0711   |
| total_timesteps    | 2154000  |
| value_loss         | 0.0672   |
---------------------------------
---------------------------------
| explained_variance | 0.505    |
| fps                | 748      |
| nupdates           | 107800   |
| policy_entropy     | 2.06     |
| policy_loss        | -0.125   |
| total_timesteps    | 2156000  |
| value_loss         | 0.00345  |
---------------------------------
---------------------------------
| explained_variance | 0.587    |
| fps         

---------------------------------
| explained_variance | 0.873    |
| fps                | 747      |
| nupdates           | 110300   |
| policy_entropy     | 2.08     |
| policy_loss        | 0.0448   |
| total_timesteps    | 2206000  |
| value_loss         | 0.00115  |
---------------------------------
---------------------------------
| explained_variance | 0.805    |
| fps                | 747      |
| nupdates           | 110400   |
| policy_entropy     | 2.07     |
| policy_loss        | -0.0929  |
| total_timesteps    | 2208000  |
| value_loss         | 0.043    |
---------------------------------
---------------------------------
| explained_variance | -0.323   |
| fps                | 747      |
| nupdates           | 110500   |
| policy_entropy     | 2.07     |
| policy_loss        | 0.113    |
| total_timesteps    | 2210000  |
| value_loss         | 0.00828  |
---------------------------------
---------------------------------
| explained_variance | 0.969    |
| fps         

---------------------------------
| explained_variance | 0.601    |
| fps                | 746      |
| nupdates           | 113000   |
| policy_entropy     | 2.06     |
| policy_loss        | 0.0691   |
| total_timesteps    | 2260000  |
| value_loss         | 0.00228  |
---------------------------------
---------------------------------
| explained_variance | -0.0148  |
| fps                | 746      |
| nupdates           | 113100   |
| policy_entropy     | 2.07     |
| policy_loss        | -0.101   |
| total_timesteps    | 2262000  |
| value_loss         | 0.00208  |
---------------------------------
---------------------------------
| explained_variance | 0.498    |
| fps                | 746      |
| nupdates           | 113200   |
| policy_entropy     | 2.07     |
| policy_loss        | 0.0735   |
| total_timesteps    | 2264000  |
| value_loss         | 0.00284  |
---------------------------------
---------------------------------
| explained_variance | 0.789    |
| fps         

---------------------------------
| explained_variance | 0.622    |
| fps                | 745      |
| nupdates           | 115700   |
| policy_entropy     | 2.05     |
| policy_loss        | 0.026    |
| total_timesteps    | 2314000  |
| value_loss         | 0.000661 |
---------------------------------
---------------------------------
| explained_variance | 0.778    |
| fps                | 745      |
| nupdates           | 115800   |
| policy_entropy     | 2.07     |
| policy_loss        | 0.00216  |
| total_timesteps    | 2316000  |
| value_loss         | 0.000163 |
---------------------------------
---------------------------------
| explained_variance | 0.938    |
| fps                | 745      |
| nupdates           | 115900   |
| policy_entropy     | 2.07     |
| policy_loss        | -0.0447  |
| total_timesteps    | 2318000  |
| value_loss         | 0.000346 |
---------------------------------
---------------------------------
| explained_variance | 0.973    |
| fps         

---------------------------------
| explained_variance | 0.558    |
| fps                | 744      |
| nupdates           | 118400   |
| policy_entropy     | 2.07     |
| policy_loss        | 0.0526   |
| total_timesteps    | 2368000  |
| value_loss         | 0.00144  |
---------------------------------
---------------------------------
| explained_variance | 0.418    |
| fps                | 744      |
| nupdates           | 118500   |
| policy_entropy     | 2.07     |
| policy_loss        | -0.192   |
| total_timesteps    | 2370000  |
| value_loss         | 0.00764  |
---------------------------------
---------------------------------
| explained_variance | 0.731    |
| fps                | 744      |
| nupdates           | 118600   |
| policy_entropy     | 2.06     |
| policy_loss        | -0.0396  |
| total_timesteps    | 2372000  |
| value_loss         | 0.000115 |
---------------------------------
---------------------------------
| explained_variance | 0.6      |
| fps         

---------------------------------
| explained_variance | 0.81     |
| fps                | 743      |
| nupdates           | 121100   |
| policy_entropy     | 2.07     |
| policy_loss        | -0.0859  |
| total_timesteps    | 2422000  |
| value_loss         | 0.00147  |
---------------------------------
---------------------------------
| explained_variance | -0.203   |
| fps                | 743      |
| nupdates           | 121200   |
| policy_entropy     | 2.07     |
| policy_loss        | 0.00888  |
| total_timesteps    | 2424000  |
| value_loss         | 0.0013   |
---------------------------------
Eval num_timesteps=2425000, episode_reward=-5.00 +/- 0.00
Episode length: 585.20 +/- 68.91
---------------------------------
| explained_variance | 0.965    |
| fps                | 742      |
| nupdates           | 121300   |
| policy_entropy     | 2.08     |
| policy_loss        | 0.0475   |
| total_timesteps    | 2426000  |
| value_loss         | 0.0012   |
-------------------------

Eval num_timesteps=2475000, episode_reward=-4.80 +/- 0.60
Episode length: 577.30 +/- 138.39
---------------------------------
| explained_variance | 0.788    |
| fps                | 742      |
| nupdates           | 123800   |
| policy_entropy     | 2.04     |
| policy_loss        | 0.0402   |
| total_timesteps    | 2476000  |
| value_loss         | 0.0436   |
---------------------------------
---------------------------------
| explained_variance | 0.939    |
| fps                | 742      |
| nupdates           | 123900   |
| policy_entropy     | 2.04     |
| policy_loss        | 0.0214   |
| total_timesteps    | 2478000  |
| value_loss         | 0.00172  |
---------------------------------
---------------------------------
| explained_variance | -0.936   |
| fps                | 742      |
| nupdates           | 124000   |
| policy_entropy     | 2.05     |
| policy_loss        | -0.182   |
| total_timesteps    | 2480000  |
| value_loss         | 0.00894  |
------------------------

---------------------------------
| explained_variance | 0.181    |
| fps                | 741      |
| nupdates           | 126400   |
| policy_entropy     | 2.02     |
| policy_loss        | 0.00979  |
| total_timesteps    | 2528000  |
| value_loss         | 0.00108  |
---------------------------------
---------------------------------
| explained_variance | -1.08    |
| fps                | 741      |
| nupdates           | 126500   |
| policy_entropy     | 2.05     |
| policy_loss        | -0.059   |
| total_timesteps    | 2530000  |
| value_loss         | 0.0052   |
---------------------------------
---------------------------------
| explained_variance | 0.628    |
| fps                | 741      |
| nupdates           | 126600   |
| policy_entropy     | 2.03     |
| policy_loss        | -0.0302  |
| total_timesteps    | 2532000  |
| value_loss         | 0.000885 |
---------------------------------
---------------------------------
| explained_variance | -0.117   |
| fps         

---------------------------------
| explained_variance | 0.577    |
| fps                | 740      |
| nupdates           | 129100   |
| policy_entropy     | 2.07     |
| policy_loss        | -0.0361  |
| total_timesteps    | 2582000  |
| value_loss         | 0.000453 |
---------------------------------
---------------------------------
| explained_variance | -0.463   |
| fps                | 740      |
| nupdates           | 129200   |
| policy_entropy     | 2.07     |
| policy_loss        | 0.134    |
| total_timesteps    | 2584000  |
| value_loss         | 0.00725  |
---------------------------------
---------------------------------
| explained_variance | 0.849    |
| fps                | 740      |
| nupdates           | 129300   |
| policy_entropy     | 2.06     |
| policy_loss        | 0.0108   |
| total_timesteps    | 2586000  |
| value_loss         | 0.000305 |
---------------------------------
---------------------------------
| explained_variance | 0.404    |
| fps         

---------------------------------
| explained_variance | 0.939    |
| fps                | 739      |
| nupdates           | 131800   |
| policy_entropy     | 2.06     |
| policy_loss        | 0.063    |
| total_timesteps    | 2636000  |
| value_loss         | 0.00188  |
---------------------------------
---------------------------------
| explained_variance | -0.176   |
| fps                | 739      |
| nupdates           | 131900   |
| policy_entropy     | 2.07     |
| policy_loss        | -0.019   |
| total_timesteps    | 2638000  |
| value_loss         | 0.000573 |
---------------------------------
---------------------------------
| explained_variance | 0.285    |
| fps                | 739      |
| nupdates           | 132000   |
| policy_entropy     | 2.07     |
| policy_loss        | 0.097    |
| total_timesteps    | 2640000  |
| value_loss         | 0.00478  |
---------------------------------
---------------------------------
| explained_variance | 0.564    |
| fps         

---------------------------------
| explained_variance | 0.818    |
| fps                | 738      |
| nupdates           | 134500   |
| policy_entropy     | 2.06     |
| policy_loss        | 0.113    |
| total_timesteps    | 2690000  |
| value_loss         | 0.0046   |
---------------------------------
---------------------------------
| explained_variance | 0.974    |
| fps                | 738      |
| nupdates           | 134600   |
| policy_entropy     | 2.06     |
| policy_loss        | -0.0588  |
| total_timesteps    | 2692000  |
| value_loss         | 0.000443 |
---------------------------------
---------------------------------
| explained_variance | -0.135   |
| fps                | 738      |
| nupdates           | 134700   |
| policy_entropy     | 2.05     |
| policy_loss        | 0.116    |
| total_timesteps    | 2694000  |
| value_loss         | 0.00552  |
---------------------------------
---------------------------------
| explained_variance | 0.542    |
| fps         

---------------------------------
| explained_variance | 0.899    |
| fps                | 737      |
| nupdates           | 137200   |
| policy_entropy     | 2.06     |
| policy_loss        | -0.0357  |
| total_timesteps    | 2744000  |
| value_loss         | 0.000352 |
---------------------------------
---------------------------------
| explained_variance | 0.942    |
| fps                | 737      |
| nupdates           | 137300   |
| policy_entropy     | 2.06     |
| policy_loss        | -0.00176 |
| total_timesteps    | 2746000  |
| value_loss         | 0.000141 |
---------------------------------
---------------------------------
| explained_variance | -4.66    |
| fps                | 737      |
| nupdates           | 137400   |
| policy_entropy     | 2.07     |
| policy_loss        | -0.25    |
| total_timesteps    | 2748000  |
| value_loss         | 0.0204   |
---------------------------------
Eval num_timesteps=2750000, episode_reward=-4.70 +/- 0.46
Episode length: 546.20 +

---------------------------------
| explained_variance | 0.177    |
| fps                | 737      |
| nupdates           | 139900   |
| policy_entropy     | 2.07     |
| policy_loss        | -0.842   |
| total_timesteps    | 2798000  |
| value_loss         | 0.242    |
---------------------------------
Eval num_timesteps=2800000, episode_reward=-4.90 +/- 0.30
Episode length: 575.10 +/- 107.34
---------------------------------
| explained_variance | 0.45     |
| fps                | 736      |
| nupdates           | 140000   |
| policy_entropy     | 2.07     |
| policy_loss        | 0.00884  |
| total_timesteps    | 2800000  |
| value_loss         | 0.000678 |
---------------------------------
---------------------------------
| explained_variance | 0.721    |
| fps                | 736      |
| nupdates           | 140100   |
| policy_entropy     | 2.07     |
| policy_loss        | 0.0321   |
| total_timesteps    | 2802000  |
| value_loss         | 0.000719 |
------------------------

---------------------------------
| explained_variance | 0.616    |
| fps                | 736      |
| nupdates           | 142600   |
| policy_entropy     | 2        |
| policy_loss        | -0.46    |
| total_timesteps    | 2852000  |
| value_loss         | 0.128    |
---------------------------------
---------------------------------
| explained_variance | 0.987    |
| fps                | 737      |
| nupdates           | 142700   |
| policy_entropy     | 2.05     |
| policy_loss        | -0.0523  |
| total_timesteps    | 2854000  |
| value_loss         | 0.000289 |
---------------------------------
---------------------------------
| explained_variance | 0.646    |
| fps                | 737      |
| nupdates           | 142800   |
| policy_entropy     | 2.08     |
| policy_loss        | -0.0242  |
| total_timesteps    | 2856000  |
| value_loss         | 0.00022  |
---------------------------------
---------------------------------
| explained_variance | -3.35    |
| fps         

---------------------------------
| explained_variance | -2.41    |
| fps                | 736      |
| nupdates           | 145300   |
| policy_entropy     | 2.08     |
| policy_loss        | 0.12     |
| total_timesteps    | 2906000  |
| value_loss         | 0.00659  |
---------------------------------
---------------------------------
| explained_variance | -7.68    |
| fps                | 737      |
| nupdates           | 145400   |
| policy_entropy     | 2.07     |
| policy_loss        | -0.274   |
| total_timesteps    | 2908000  |
| value_loss         | 0.0316   |
---------------------------------
---------------------------------
| explained_variance | 0.743    |
| fps                | 737      |
| nupdates           | 145500   |
| policy_entropy     | 2.07     |
| policy_loss        | -0.0246  |
| total_timesteps    | 2910000  |
| value_loss         | 9.4e-05  |
---------------------------------
---------------------------------
| explained_variance | 0.909    |
| fps         

---------------------------------
| explained_variance | 0.979    |
| fps                | 736      |
| nupdates           | 148000   |
| policy_entropy     | 2.08     |
| policy_loss        | 0.0253   |
| total_timesteps    | 2960000  |
| value_loss         | 0.000554 |
---------------------------------
---------------------------------
| explained_variance | 0.917    |
| fps                | 737      |
| nupdates           | 148100   |
| policy_entropy     | 2.08     |
| policy_loss        | -0.0306  |
| total_timesteps    | 2962000  |
| value_loss         | 0.00027  |
---------------------------------
---------------------------------
| explained_variance | 0.846    |
| fps                | 737      |
| nupdates           | 148200   |
| policy_entropy     | 2.08     |
| policy_loss        | -0.132   |
| total_timesteps    | 2964000  |
| value_loss         | 0.00321  |
---------------------------------
---------------------------------
| explained_variance | 0.821    |
| fps         

---------------------------------
| explained_variance | 0.528    |
| fps                | 737      |
| nupdates           | 150700   |
| policy_entropy     | 2.05     |
| policy_loss        | -0.0166  |
| total_timesteps    | 3014000  |
| value_loss         | 0.000578 |
---------------------------------
---------------------------------
| explained_variance | -1.26    |
| fps                | 737      |
| nupdates           | 150800   |
| policy_entropy     | 2.05     |
| policy_loss        | -0.298   |
| total_timesteps    | 3016000  |
| value_loss         | 0.0206   |
---------------------------------
---------------------------------
| explained_variance | 0.507    |
| fps                | 737      |
| nupdates           | 150900   |
| policy_entropy     | 2.07     |
| policy_loss        | 0.00324  |
| total_timesteps    | 3018000  |
| value_loss         | 0.000239 |
---------------------------------
---------------------------------
| explained_variance | -0.429   |
| fps         

---------------------------------
| explained_variance | 0.58     |
| fps                | 737      |
| nupdates           | 153400   |
| policy_entropy     | 2.05     |
| policy_loss        | 0.158    |
| total_timesteps    | 3068000  |
| value_loss         | 0.00961  |
---------------------------------
---------------------------------
| explained_variance | -3.71    |
| fps                | 737      |
| nupdates           | 153500   |
| policy_entropy     | 2.06     |
| policy_loss        | -0.00624 |
| total_timesteps    | 3070000  |
| value_loss         | 0.00477  |
---------------------------------
---------------------------------
| explained_variance | 0.861    |
| fps                | 737      |
| nupdates           | 153600   |
| policy_entropy     | 2.07     |
| policy_loss        | -0.0711  |
| total_timesteps    | 3072000  |
| value_loss         | 0.000838 |
---------------------------------
---------------------------------
| explained_variance | 0.947    |
| fps         

---------------------------------
| explained_variance | -0.283   |
| fps                | 737      |
| nupdates           | 156100   |
| policy_entropy     | 2.06     |
| policy_loss        | -0.0529  |
| total_timesteps    | 3122000  |
| value_loss         | 0.000454 |
---------------------------------
---------------------------------
| explained_variance | 0.761    |
| fps                | 737      |
| nupdates           | 156200   |
| policy_entropy     | 2.07     |
| policy_loss        | -0.138   |
| total_timesteps    | 3124000  |
| value_loss         | 0.0453   |
---------------------------------
Eval num_timesteps=3125000, episode_reward=-5.00 +/- 0.00
Episode length: 535.80 +/- 89.32
---------------------------------
| explained_variance | 5.57e-05 |
| fps                | 737      |
| nupdates           | 156300   |
| policy_entropy     | 2.06     |
| policy_loss        | 0.0196   |
| total_timesteps    | 3126000  |
| value_loss         | 0.00266  |
-------------------------

Eval num_timesteps=3175000, episode_reward=-5.00 +/- 0.00
Episode length: 561.70 +/- 69.02
---------------------------------
| explained_variance | -8.4     |
| fps                | 737      |
| nupdates           | 158800   |
| policy_entropy     | 2.03     |
| policy_loss        | 0.505    |
| total_timesteps    | 3176000  |
| value_loss         | 0.0769   |
---------------------------------
---------------------------------
| explained_variance | 0.5      |
| fps                | 737      |
| nupdates           | 158900   |
| policy_entropy     | 2.04     |
| policy_loss        | 0.197    |
| total_timesteps    | 3178000  |
| value_loss         | 0.0517   |
---------------------------------
---------------------------------
| explained_variance | 0.636    |
| fps                | 737      |
| nupdates           | 159000   |
| policy_entropy     | 2.06     |
| policy_loss        | 0.04     |
| total_timesteps    | 3180000  |
| value_loss         | 0.00136  |
-------------------------

---------------------------------
| explained_variance | 0.817    |
| fps                | 738      |
| nupdates           | 161400   |
| policy_entropy     | 2.05     |
| policy_loss        | 0.111    |
| total_timesteps    | 3228000  |
| value_loss         | 0.00449  |
---------------------------------
---------------------------------
| explained_variance | -1.39    |
| fps                | 738      |
| nupdates           | 161500   |
| policy_entropy     | 2.04     |
| policy_loss        | -0.162   |
| total_timesteps    | 3230000  |
| value_loss         | 0.0087   |
---------------------------------
---------------------------------
| explained_variance | 0.899    |
| fps                | 738      |
| nupdates           | 161600   |
| policy_entropy     | 2.06     |
| policy_loss        | -0.0028  |
| total_timesteps    | 3232000  |
| value_loss         | 0.000373 |
---------------------------------
---------------------------------
| explained_variance | -193     |
| fps         

---------------------------------
| explained_variance | 0.769    |
| fps                | 738      |
| nupdates           | 164100   |
| policy_entropy     | 2.07     |
| policy_loss        | -0.0293  |
| total_timesteps    | 3282000  |
| value_loss         | 0.000181 |
---------------------------------
---------------------------------
| explained_variance | 0.728    |
| fps                | 738      |
| nupdates           | 164200   |
| policy_entropy     | 2.06     |
| policy_loss        | 0.15     |
| total_timesteps    | 3284000  |
| value_loss         | 0.008    |
---------------------------------
---------------------------------
| explained_variance | 0.963    |
| fps                | 738      |
| nupdates           | 164300   |
| policy_entropy     | 2.07     |
| policy_loss        | 0.0114   |
| total_timesteps    | 3286000  |
| value_loss         | 0.000278 |
---------------------------------
---------------------------------
| explained_variance | -1.71    |
| fps         

----------------------------------
| explained_variance | 0.944     |
| fps                | 738       |
| nupdates           | 166800    |
| policy_entropy     | 2.07      |
| policy_loss        | -0.000765 |
| total_timesteps    | 3336000   |
| value_loss         | 0.000143  |
----------------------------------
---------------------------------
| explained_variance | 0.492    |
| fps                | 738      |
| nupdates           | 166900   |
| policy_entropy     | 2.07     |
| policy_loss        | -0.103   |
| total_timesteps    | 3338000  |
| value_loss         | 0.0027   |
---------------------------------
---------------------------------
| explained_variance | -5.34    |
| fps                | 738      |
| nupdates           | 167000   |
| policy_entropy     | 2.07     |
| policy_loss        | 0.137    |
| total_timesteps    | 3340000  |
| value_loss         | 0.00751  |
---------------------------------
---------------------------------
| explained_variance | 0.445    |
| fps

---------------------------------
| explained_variance | 0.287    |
| fps                | 739      |
| nupdates           | 169500   |
| policy_entropy     | 2.04     |
| policy_loss        | 0.0669   |
| total_timesteps    | 3390000  |
| value_loss         | 0.00205  |
---------------------------------
---------------------------------
| explained_variance | 0.942    |
| fps                | 739      |
| nupdates           | 169600   |
| policy_entropy     | 2.07     |
| policy_loss        | 0.044    |
| total_timesteps    | 3392000  |
| value_loss         | 0.00119  |
---------------------------------
---------------------------------
| explained_variance | 0.776    |
| fps                | 739      |
| nupdates           | 169700   |
| policy_entropy     | 2.05     |
| policy_loss        | -0.07    |
| total_timesteps    | 3394000  |
| value_loss         | 0.00118  |
---------------------------------
---------------------------------
| explained_variance | -0.0414  |
| fps         

---------------------------------
| explained_variance | 0.529    |
| fps                | 739      |
| nupdates           | 172200   |
| policy_entropy     | 2.07     |
| policy_loss        | -0.162   |
| total_timesteps    | 3444000  |
| value_loss         | 0.00547  |
---------------------------------
---------------------------------
| explained_variance | 0.782    |
| fps                | 739      |
| nupdates           | 172300   |
| policy_entropy     | 2.05     |
| policy_loss        | -0.0759  |
| total_timesteps    | 3446000  |
| value_loss         | 0.00125  |
---------------------------------
---------------------------------
| explained_variance | 0.812    |
| fps                | 740      |
| nupdates           | 172400   |
| policy_entropy     | 2.07     |
| policy_loss        | -0.0538  |
| total_timesteps    | 3448000  |
| value_loss         | 0.000463 |
---------------------------------
Eval num_timesteps=3450000, episode_reward=-4.90 +/- 0.30
Episode length: 643.90 +

---------------------------------
| explained_variance | 0.366    |
| fps                | 740      |
| nupdates           | 174900   |
| policy_entropy     | 2.08     |
| policy_loss        | -0.0488  |
| total_timesteps    | 3498000  |
| value_loss         | 0.00237  |
---------------------------------
Eval num_timesteps=3500000, episode_reward=-4.80 +/- 0.40
Episode length: 538.60 +/- 83.75
---------------------------------
| explained_variance | 0.765    |
| fps                | 739      |
| nupdates           | 175000   |
| policy_entropy     | 2.08     |
| policy_loss        | -0.067   |
| total_timesteps    | 3500000  |
| value_loss         | 0.000751 |
---------------------------------
---------------------------------
| explained_variance | 0.953    |
| fps                | 739      |
| nupdates           | 175100   |
| policy_entropy     | 2.07     |
| policy_loss        | -0.0134  |
| total_timesteps    | 3502000  |
| value_loss         | 0.000114 |
-------------------------

---------------------------------
| explained_variance | 0.565    |
| fps                | 740      |
| nupdates           | 177600   |
| policy_entropy     | 2.07     |
| policy_loss        | -0.109   |
| total_timesteps    | 3552000  |
| value_loss         | 0.00245  |
---------------------------------
---------------------------------
| explained_variance | 0.902    |
| fps                | 740      |
| nupdates           | 177700   |
| policy_entropy     | 2.07     |
| policy_loss        | -0.018   |
| total_timesteps    | 3554000  |
| value_loss         | 8.31e-05 |
---------------------------------
---------------------------------
| explained_variance | 0.605    |
| fps                | 740      |
| nupdates           | 177800   |
| policy_entropy     | 2.07     |
| policy_loss        | -0.045   |
| total_timesteps    | 3556000  |
| value_loss         | 0.000496 |
---------------------------------
---------------------------------
| explained_variance | 0.541    |
| fps         

---------------------------------
| explained_variance | 0.566    |
| fps                | 740      |
| nupdates           | 180300   |
| policy_entropy     | 2.06     |
| policy_loss        | 0.134    |
| total_timesteps    | 3606000  |
| value_loss         | 0.00705  |
---------------------------------
---------------------------------
| explained_variance | 0.765    |
| fps                | 740      |
| nupdates           | 180400   |
| policy_entropy     | 2.05     |
| policy_loss        | 0.0412   |
| total_timesteps    | 3608000  |
| value_loss         | 0.00121  |
---------------------------------
---------------------------------
| explained_variance | 0.739    |
| fps                | 740      |
| nupdates           | 180500   |
| policy_entropy     | 2.02     |
| policy_loss        | 0.0545   |
| total_timesteps    | 3610000  |
| value_loss         | 0.00197  |
---------------------------------
---------------------------------
| explained_variance | 0.734    |
| fps         

---------------------------------
| explained_variance | 0.877    |
| fps                | 740      |
| nupdates           | 183000   |
| policy_entropy     | 2.07     |
| policy_loss        | -0.044   |
| total_timesteps    | 3660000  |
| value_loss         | 0.000599 |
---------------------------------
---------------------------------
| explained_variance | 0.561    |
| fps                | 740      |
| nupdates           | 183100   |
| policy_entropy     | 2.07     |
| policy_loss        | -0.428   |
| total_timesteps    | 3662000  |
| value_loss         | 0.125    |
---------------------------------
---------------------------------
| explained_variance | 0.754    |
| fps                | 740      |
| nupdates           | 183200   |
| policy_entropy     | 2.07     |
| policy_loss        | 0.0694   |
| total_timesteps    | 3664000  |
| value_loss         | 0.00233  |
---------------------------------
---------------------------------
| explained_variance | 0.897    |
| fps         

---------------------------------
| explained_variance | -0.224   |
| fps                | 741      |
| nupdates           | 185700   |
| policy_entropy     | 2.05     |
| policy_loss        | -0.0384  |
| total_timesteps    | 3714000  |
| value_loss         | 0.00297  |
---------------------------------
---------------------------------
| explained_variance | 0.804    |
| fps                | 741      |
| nupdates           | 185800   |
| policy_entropy     | 2.04     |
| policy_loss        | -0.0604  |
| total_timesteps    | 3716000  |
| value_loss         | 0.000553 |
---------------------------------
---------------------------------
| explained_variance | 0.688    |
| fps                | 741      |
| nupdates           | 185900   |
| policy_entropy     | 2.05     |
| policy_loss        | 0.0347   |
| total_timesteps    | 3718000  |
| value_loss         | 0.00197  |
---------------------------------
---------------------------------
| explained_variance | -0.606   |
| fps         

---------------------------------
| explained_variance | 0.26     |
| fps                | 741      |
| nupdates           | 188400   |
| policy_entropy     | 2.04     |
| policy_loss        | -0.0504  |
| total_timesteps    | 3768000  |
| value_loss         | 0.0593   |
---------------------------------
---------------------------------
| explained_variance | 0.544    |
| fps                | 741      |
| nupdates           | 188500   |
| policy_entropy     | 2.06     |
| policy_loss        | 0.0821   |
| total_timesteps    | 3770000  |
| value_loss         | 0.00265  |
---------------------------------
---------------------------------
| explained_variance | 0.842    |
| fps                | 741      |
| nupdates           | 188600   |
| policy_entropy     | 2.05     |
| policy_loss        | 0.0547   |
| total_timesteps    | 3772000  |
| value_loss         | 0.00463  |
---------------------------------
---------------------------------
| explained_variance | 0.461    |
| fps         

---------------------------------
| explained_variance | 0.804    |
| fps                | 741      |
| nupdates           | 191100   |
| policy_entropy     | 2.06     |
| policy_loss        | -0.00914 |
| total_timesteps    | 3822000  |
| value_loss         | 0.000768 |
---------------------------------
---------------------------------
| explained_variance | 0.817    |
| fps                | 742      |
| nupdates           | 191200   |
| policy_entropy     | 2.06     |
| policy_loss        | -0.0073  |
| total_timesteps    | 3824000  |
| value_loss         | 0.00057  |
---------------------------------
Eval num_timesteps=3825000, episode_reward=-4.70 +/- 0.46
Episode length: 538.30 +/- 122.43
---------------------------------
| explained_variance | 0.653    |
| fps                | 741      |
| nupdates           | 191300   |
| policy_entropy     | 2.08     |
| policy_loss        | -0.165   |
| total_timesteps    | 3826000  |
| value_loss         | 0.0596   |
------------------------

Eval num_timesteps=3875000, episode_reward=-4.70 +/- 0.46
Episode length: 642.70 +/- 80.90
---------------------------------
| explained_variance | 0.818    |
| fps                | 741      |
| nupdates           | 193800   |
| policy_entropy     | 2.08     |
| policy_loss        | 0.0255   |
| total_timesteps    | 3876000  |
| value_loss         | 0.000689 |
---------------------------------
---------------------------------
| explained_variance | -0.383   |
| fps                | 741      |
| nupdates           | 193900   |
| policy_entropy     | 2.02     |
| policy_loss        | 2.16     |
| total_timesteps    | 3878000  |
| value_loss         | 1.45     |
---------------------------------
---------------------------------
| explained_variance | 0.897    |
| fps                | 741      |
| nupdates           | 194000   |
| policy_entropy     | 2.05     |
| policy_loss        | 0.0468   |
| total_timesteps    | 3880000  |
| value_loss         | 0.00564  |
-------------------------

---------------------------------
| explained_variance | 0.711    |
| fps                | 741      |
| nupdates           | 196400   |
| policy_entropy     | 2.07     |
| policy_loss        | -0.12    |
| total_timesteps    | 3928000  |
| value_loss         | 0.00313  |
---------------------------------
---------------------------------
| explained_variance | 0.708    |
| fps                | 742      |
| nupdates           | 196500   |
| policy_entropy     | 2.02     |
| policy_loss        | 0.191    |
| total_timesteps    | 3930000  |
| value_loss         | 0.0109   |
---------------------------------
---------------------------------
| explained_variance | 0.766    |
| fps                | 742      |
| nupdates           | 196600   |
| policy_entropy     | 2.06     |
| policy_loss        | 0.013    |
| total_timesteps    | 3932000  |
| value_loss         | 0.000557 |
---------------------------------
---------------------------------
| explained_variance | 0.599    |
| fps         

---------------------------------
| explained_variance | 0.349    |
| fps                | 741      |
| nupdates           | 199100   |
| policy_entropy     | 2.05     |
| policy_loss        | 0.14     |
| total_timesteps    | 3982000  |
| value_loss         | 0.0081   |
---------------------------------
---------------------------------
| explained_variance | 0.958    |
| fps                | 741      |
| nupdates           | 199200   |
| policy_entropy     | 2.06     |
| policy_loss        | -0.00941 |
| total_timesteps    | 3984000  |
| value_loss         | 0.000139 |
---------------------------------
---------------------------------
| explained_variance | 0.0875   |
| fps                | 741      |
| nupdates           | 199300   |
| policy_entropy     | 2.06     |
| policy_loss        | -0.118   |
| total_timesteps    | 3986000  |
| value_loss         | 0.00436  |
---------------------------------
---------------------------------
| explained_variance | 0.541    |
| fps         

---------------------------------
| explained_variance | 0.579    |
| fps                | 741      |
| nupdates           | 201800   |
| policy_entropy     | 2.04     |
| policy_loss        | -0.0353  |
| total_timesteps    | 4036000  |
| value_loss         | 0.0505   |
---------------------------------
---------------------------------
| explained_variance | 0.928    |
| fps                | 741      |
| nupdates           | 201900   |
| policy_entropy     | 2.06     |
| policy_loss        | -0.0684  |
| total_timesteps    | 4038000  |
| value_loss         | 0.000653 |
---------------------------------
---------------------------------
| explained_variance | 0.826    |
| fps                | 741      |
| nupdates           | 202000   |
| policy_entropy     | 2.06     |
| policy_loss        | 0.00607  |
| total_timesteps    | 4040000  |
| value_loss         | 0.000256 |
---------------------------------
---------------------------------
| explained_variance | 0.479    |
| fps         

---------------------------------
| explained_variance | 0.495    |
| fps                | 741      |
| nupdates           | 204500   |
| policy_entropy     | 2.06     |
| policy_loss        | -0.122   |
| total_timesteps    | 4090000  |
| value_loss         | 0.00267  |
---------------------------------
---------------------------------
| explained_variance | 0.693    |
| fps                | 742      |
| nupdates           | 204600   |
| policy_entropy     | 2.07     |
| policy_loss        | -0.0501  |
| total_timesteps    | 4092000  |
| value_loss         | 0.000413 |
---------------------------------
---------------------------------
| explained_variance | 0.814    |
| fps                | 742      |
| nupdates           | 204700   |
| policy_entropy     | 2.05     |
| policy_loss        | 0.0592   |
| total_timesteps    | 4094000  |
| value_loss         | 0.00219  |
---------------------------------
---------------------------------
| explained_variance | -1.39    |
| fps         

---------------------------------
| explained_variance | 0.286    |
| fps                | 742      |
| nupdates           | 207200   |
| policy_entropy     | 2.08     |
| policy_loss        | 0.062    |
| total_timesteps    | 4144000  |
| value_loss         | 0.00203  |
---------------------------------
---------------------------------
| explained_variance | 0.115    |
| fps                | 742      |
| nupdates           | 207300   |
| policy_entropy     | 2.06     |
| policy_loss        | -0.106   |
| total_timesteps    | 4146000  |
| value_loss         | 0.00333  |
---------------------------------
---------------------------------
| explained_variance | 0.65     |
| fps                | 742      |
| nupdates           | 207400   |
| policy_entropy     | 2.07     |
| policy_loss        | 0.0396   |
| total_timesteps    | 4148000  |
| value_loss         | 0.00163  |
---------------------------------
Eval num_timesteps=4150000, episode_reward=-4.80 +/- 0.40
Episode length: 571.90 +

---------------------------------
| explained_variance | -0.157   |
| fps                | 742      |
| nupdates           | 209900   |
| policy_entropy     | 2.03     |
| policy_loss        | 0.0418   |
| total_timesteps    | 4198000  |
| value_loss         | 0.00137  |
---------------------------------
Eval num_timesteps=4200000, episode_reward=-4.70 +/- 0.46
Episode length: 620.80 +/- 58.79
---------------------------------
| explained_variance | 0.449    |
| fps                | 742      |
| nupdates           | 210000   |
| policy_entropy     | 2.03     |
| policy_loss        | 0.0105   |
| total_timesteps    | 4200000  |
| value_loss         | 0.042    |
---------------------------------
---------------------------------
| explained_variance | 0.902    |
| fps                | 742      |
| nupdates           | 210100   |
| policy_entropy     | 2.05     |
| policy_loss        | -0.0547  |
| total_timesteps    | 4202000  |
| value_loss         | 0.000524 |
-------------------------

---------------------------------
| explained_variance | 0.445    |
| fps                | 742      |
| nupdates           | 212600   |
| policy_entropy     | 2.06     |
| policy_loss        | -0.0242  |
| total_timesteps    | 4252000  |
| value_loss         | 0.000196 |
---------------------------------
---------------------------------
| explained_variance | 0.853    |
| fps                | 742      |
| nupdates           | 212700   |
| policy_entropy     | 2.06     |
| policy_loss        | 0.0548   |
| total_timesteps    | 4254000  |
| value_loss         | 0.00186  |
---------------------------------
---------------------------------
| explained_variance | 0.763    |
| fps                | 742      |
| nupdates           | 212800   |
| policy_entropy     | 2.07     |
| policy_loss        | 0.0181   |
| total_timesteps    | 4256000  |
| value_loss         | 0.000409 |
---------------------------------
---------------------------------
| explained_variance | 0.264    |
| fps         

---------------------------------
| explained_variance | 0.636    |
| fps                | 742      |
| nupdates           | 215300   |
| policy_entropy     | 2.07     |
| policy_loss        | 0.0699   |
| total_timesteps    | 4306000  |
| value_loss         | 0.00236  |
---------------------------------
---------------------------------
| explained_variance | 0.773    |
| fps                | 742      |
| nupdates           | 215400   |
| policy_entropy     | 2.07     |
| policy_loss        | -0.0193  |
| total_timesteps    | 4308000  |
| value_loss         | 0.000252 |
---------------------------------
---------------------------------
| explained_variance | -0.069   |
| fps                | 742      |
| nupdates           | 215500   |
| policy_entropy     | 2.04     |
| policy_loss        | -0.0893  |
| total_timesteps    | 4310000  |
| value_loss         | 0.00332  |
---------------------------------
---------------------------------
| explained_variance | 0.983    |
| fps         

---------------------------------
| explained_variance | 0.768    |
| fps                | 743      |
| nupdates           | 218000   |
| policy_entropy     | 2.08     |
| policy_loss        | -0.0284  |
| total_timesteps    | 4360000  |
| value_loss         | 0.000866 |
---------------------------------
---------------------------------
| explained_variance | 0.851    |
| fps                | 743      |
| nupdates           | 218100   |
| policy_entropy     | 2.08     |
| policy_loss        | -0.023   |
| total_timesteps    | 4362000  |
| value_loss         | 7.51e-05 |
---------------------------------
---------------------------------
| explained_variance | 0.476    |
| fps                | 743      |
| nupdates           | 218200   |
| policy_entropy     | 2.08     |
| policy_loss        | 0.0966   |
| total_timesteps    | 4364000  |
| value_loss         | 0.00376  |
---------------------------------
---------------------------------
| explained_variance | 0.182    |
| fps         

---------------------------------
| explained_variance | 0.616    |
| fps                | 743      |
| nupdates           | 220700   |
| policy_entropy     | 2.08     |
| policy_loss        | -0.112   |
| total_timesteps    | 4414000  |
| value_loss         | 0.00285  |
---------------------------------
---------------------------------
| explained_variance | -5.59    |
| fps                | 743      |
| nupdates           | 220800   |
| policy_entropy     | 2.08     |
| policy_loss        | 0.588    |
| total_timesteps    | 4416000  |
| value_loss         | 0.0841   |
---------------------------------
---------------------------------
| explained_variance | 0.716    |
| fps                | 743      |
| nupdates           | 220900   |
| policy_entropy     | 2.07     |
| policy_loss        | -0.0862  |
| total_timesteps    | 4418000  |
| value_loss         | 0.00159  |
---------------------------------
---------------------------------
| explained_variance | 0.955    |
| fps         

---------------------------------
| explained_variance | 0.491    |
| fps                | 743      |
| nupdates           | 223400   |
| policy_entropy     | 2.07     |
| policy_loss        | 0.267    |
| total_timesteps    | 4468000  |
| value_loss         | 0.023    |
---------------------------------
---------------------------------
| explained_variance | -68      |
| fps                | 743      |
| nupdates           | 223500   |
| policy_entropy     | 2.06     |
| policy_loss        | 0.0508   |
| total_timesteps    | 4470000  |
| value_loss         | 0.0385   |
---------------------------------
---------------------------------
| explained_variance | 0.733    |
| fps                | 743      |
| nupdates           | 223600   |
| policy_entropy     | 2.06     |
| policy_loss        | 0.00869  |
| total_timesteps    | 4472000  |
| value_loss         | 0.000663 |
---------------------------------
---------------------------------
| explained_variance | 0.841    |
| fps         

---------------------------------
| explained_variance | 0.947    |
| fps                | 744      |
| nupdates           | 226100   |
| policy_entropy     | 2.04     |
| policy_loss        | 0.000938 |
| total_timesteps    | 4522000  |
| value_loss         | 0.000244 |
---------------------------------
---------------------------------
| explained_variance | 0.86     |
| fps                | 744      |
| nupdates           | 226200   |
| policy_entropy     | 2.04     |
| policy_loss        | 0.109    |
| total_timesteps    | 4524000  |
| value_loss         | 0.0047   |
---------------------------------
Eval num_timesteps=4525000, episode_reward=-5.00 +/- 0.00
Episode length: 612.10 +/- 95.57
---------------------------------
| explained_variance | 0.946    |
| fps                | 743      |
| nupdates           | 226300   |
| policy_entropy     | 2.03     |
| policy_loss        | -0.0466  |
| total_timesteps    | 4526000  |
| value_loss         | 0.000369 |
-------------------------

Eval num_timesteps=4575000, episode_reward=-4.90 +/- 0.30
Episode length: 546.10 +/- 73.94
---------------------------------
| explained_variance | 0.881    |
| fps                | 744      |
| nupdates           | 228800   |
| policy_entropy     | 2.08     |
| policy_loss        | -0.0364  |
| total_timesteps    | 4576000  |
| value_loss         | 0.000357 |
---------------------------------
---------------------------------
| explained_variance | 0.466    |
| fps                | 744      |
| nupdates           | 228900   |
| policy_entropy     | 2.07     |
| policy_loss        | 0.0159   |
| total_timesteps    | 4578000  |
| value_loss         | 0.000982 |
---------------------------------
---------------------------------
| explained_variance | -0.395   |
| fps                | 744      |
| nupdates           | 229000   |
| policy_entropy     | 2.07     |
| policy_loss        | 0.0669   |
| total_timesteps    | 4580000  |
| value_loss         | 0.00225  |
-------------------------

---------------------------------
| explained_variance | -0.913   |
| fps                | 743      |
| nupdates           | 231400   |
| policy_entropy     | 2.07     |
| policy_loss        | -0.00529 |
| total_timesteps    | 4628000  |
| value_loss         | 0.000884 |
---------------------------------
---------------------------------
| explained_variance | 0.498    |
| fps                | 743      |
| nupdates           | 231500   |
| policy_entropy     | 2.07     |
| policy_loss        | 0.109    |
| total_timesteps    | 4630000  |
| value_loss         | 0.00423  |
---------------------------------
---------------------------------
| explained_variance | 0.622    |
| fps                | 744      |
| nupdates           | 231600   |
| policy_entropy     | 2.07     |
| policy_loss        | 0.139    |
| total_timesteps    | 4632000  |
| value_loss         | 0.0202   |
---------------------------------
---------------------------------
| explained_variance | 0.362    |
| fps         

---------------------------------
| explained_variance | -20.6    |
| fps                | 744      |
| nupdates           | 234100   |
| policy_entropy     | 2.07     |
| policy_loss        | 0.441    |
| total_timesteps    | 4682000  |
| value_loss         | 0.0928   |
---------------------------------
---------------------------------
| explained_variance | 0.903    |
| fps                | 744      |
| nupdates           | 234200   |
| policy_entropy     | 2.07     |
| policy_loss        | 0.0758   |
| total_timesteps    | 4684000  |
| value_loss         | 0.0088   |
---------------------------------
---------------------------------
| explained_variance | 0.518    |
| fps                | 744      |
| nupdates           | 234300   |
| policy_entropy     | 2.07     |
| policy_loss        | -0.0187  |
| total_timesteps    | 4686000  |
| value_loss         | 0.00107  |
---------------------------------
---------------------------------
| explained_variance | 0.566    |
| fps         

---------------------------------
| explained_variance | 0.829    |
| fps                | 744      |
| nupdates           | 236800   |
| policy_entropy     | 2.07     |
| policy_loss        | -0.1     |
| total_timesteps    | 4736000  |
| value_loss         | 0.00185  |
---------------------------------
---------------------------------
| explained_variance | 0.16     |
| fps                | 744      |
| nupdates           | 236900   |
| policy_entropy     | 2.07     |
| policy_loss        | 0.0806   |
| total_timesteps    | 4738000  |
| value_loss         | 0.00422  |
---------------------------------
---------------------------------
| explained_variance | -0.217   |
| fps                | 744      |
| nupdates           | 237000   |
| policy_entropy     | 2.06     |
| policy_loss        | 0.064    |
| total_timesteps    | 4740000  |
| value_loss         | 0.00247  |
---------------------------------
---------------------------------
| explained_variance | 0.889    |
| fps         

---------------------------------
| explained_variance | -57.6    |
| fps                | 744      |
| nupdates           | 239500   |
| policy_entropy     | 2.07     |
| policy_loss        | 0.516    |
| total_timesteps    | 4790000  |
| value_loss         | 0.103    |
---------------------------------
---------------------------------
| explained_variance | 0.0349   |
| fps                | 744      |
| nupdates           | 239600   |
| policy_entropy     | 2.07     |
| policy_loss        | 0.0328   |
| total_timesteps    | 4792000  |
| value_loss         | 0.00184  |
---------------------------------
---------------------------------
| explained_variance | 0.717    |
| fps                | 744      |
| nupdates           | 239700   |
| policy_entropy     | 2.04     |
| policy_loss        | -0.296   |
| total_timesteps    | 4794000  |
| value_loss         | 0.0672   |
---------------------------------
---------------------------------
| explained_variance | 0.711    |
| fps         

---------------------------------
| explained_variance | -0.92    |
| fps                | 744      |
| nupdates           | 242200   |
| policy_entropy     | 2.06     |
| policy_loss        | 0.0164   |
| total_timesteps    | 4844000  |
| value_loss         | 0.00124  |
---------------------------------
---------------------------------
| explained_variance | 0.654    |
| fps                | 744      |
| nupdates           | 242300   |
| policy_entropy     | 2.07     |
| policy_loss        | -0.258   |
| total_timesteps    | 4846000  |
| value_loss         | 0.0815   |
---------------------------------
---------------------------------
| explained_variance | 0.548    |
| fps                | 745      |
| nupdates           | 242400   |
| policy_entropy     | 2.07     |
| policy_loss        | -0.0794  |
| total_timesteps    | 4848000  |
| value_loss         | 0.00184  |
---------------------------------
Eval num_timesteps=4850000, episode_reward=-4.70 +/- 0.64
Episode length: 616.70 +

---------------------------------
| explained_variance | -0.483   |
| fps                | 745      |
| nupdates           | 244900   |
| policy_entropy     | 2.06     |
| policy_loss        | 0.154    |
| total_timesteps    | 4898000  |
| value_loss         | 0.0103   |
---------------------------------
Eval num_timesteps=4900000, episode_reward=-5.00 +/- 0.00
Episode length: 533.00 +/- 72.30
---------------------------------
| explained_variance | 0.953    |
| fps                | 744      |
| nupdates           | 245000   |
| policy_entropy     | 2.06     |
| policy_loss        | -0.024   |
| total_timesteps    | 4900000  |
| value_loss         | 9.24e-05 |
---------------------------------
---------------------------------
| explained_variance | 0.747    |
| fps                | 744      |
| nupdates           | 245100   |
| policy_entropy     | 2.07     |
| policy_loss        | 0.0383   |
| total_timesteps    | 4902000  |
| value_loss         | 0.000984 |
-------------------------

---------------------------------
| explained_variance | -1.02    |
| fps                | 745      |
| nupdates           | 247600   |
| policy_entropy     | 2.07     |
| policy_loss        | -0.199   |
| total_timesteps    | 4952000  |
| value_loss         | 0.00911  |
---------------------------------
----------------------------------
| explained_variance | 0.799     |
| fps                | 745       |
| nupdates           | 247700    |
| policy_entropy     | 2.05      |
| policy_loss        | -0.000853 |
| total_timesteps    | 4954000   |
| value_loss         | 0.000377  |
----------------------------------
---------------------------------
| explained_variance | 0.855    |
| fps                | 745      |
| nupdates           | 247800   |
| policy_entropy     | 2.07     |
| policy_loss        | -0.0202  |
| total_timesteps    | 4956000  |
| value_loss         | 0.000174 |
---------------------------------
---------------------------------
| explained_variance | 0.958    |
| fps

---------------------------------
| explained_variance | -3.1     |
| fps                | 745      |
| nupdates           | 250300   |
| policy_entropy     | 2.07     |
| policy_loss        | -0.561   |
| total_timesteps    | 5006000  |
| value_loss         | 0.0735   |
---------------------------------
---------------------------------
| explained_variance | 0.405    |
| fps                | 745      |
| nupdates           | 250400   |
| policy_entropy     | 2.07     |
| policy_loss        | 0.0892   |
| total_timesteps    | 5008000  |
| value_loss         | 0.0287   |
---------------------------------
---------------------------------
| explained_variance | 0.746    |
| fps                | 745      |
| nupdates           | 250500   |
| policy_entropy     | 2.04     |
| policy_loss        | -0.0597  |
| total_timesteps    | 5010000  |
| value_loss         | 0.000481 |
---------------------------------
---------------------------------
| explained_variance | 0.0582   |
| fps         

---------------------------------
| explained_variance | 0.866    |
| fps                | 745      |
| nupdates           | 253000   |
| policy_entropy     | 2.07     |
| policy_loss        | -0.0549  |
| total_timesteps    | 5060000  |
| value_loss         | 0.000365 |
---------------------------------
---------------------------------
| explained_variance | 0.802    |
| fps                | 745      |
| nupdates           | 253100   |
| policy_entropy     | 2.08     |
| policy_loss        | 0.0595   |
| total_timesteps    | 5062000  |
| value_loss         | 0.0438   |
---------------------------------
---------------------------------
| explained_variance | 0.629    |
| fps                | 745      |
| nupdates           | 253200   |
| policy_entropy     | 2.07     |
| policy_loss        | -0.0124  |
| total_timesteps    | 5064000  |
| value_loss         | 0.0412   |
---------------------------------
---------------------------------
| explained_variance | 0.0449   |
| fps         

---------------------------------
| explained_variance | -0.345   |
| fps                | 745      |
| nupdates           | 255700   |
| policy_entropy     | 2.07     |
| policy_loss        | -0.15    |
| total_timesteps    | 5114000  |
| value_loss         | 0.00539  |
---------------------------------
---------------------------------
| explained_variance | 0.731    |
| fps                | 745      |
| nupdates           | 255800   |
| policy_entropy     | 2.06     |
| policy_loss        | -0.0382  |
| total_timesteps    | 5116000  |
| value_loss         | 0.000386 |
---------------------------------
---------------------------------
| explained_variance | 0.768    |
| fps                | 745      |
| nupdates           | 255900   |
| policy_entropy     | 2.07     |
| policy_loss        | -0.0255  |
| total_timesteps    | 5118000  |
| value_loss         | 0.00036  |
---------------------------------
---------------------------------
| explained_variance | 0.0725   |
| fps         

---------------------------------
| explained_variance | 0.89     |
| fps                | 745      |
| nupdates           | 258400   |
| policy_entropy     | 2.08     |
| policy_loss        | 0.0397   |
| total_timesteps    | 5168000  |
| value_loss         | 0.00122  |
---------------------------------
---------------------------------
| explained_variance | -2.22    |
| fps                | 745      |
| nupdates           | 258500   |
| policy_entropy     | 2.07     |
| policy_loss        | -0.253   |
| total_timesteps    | 5170000  |
| value_loss         | 0.0212   |
---------------------------------
---------------------------------
| explained_variance | 0.843    |
| fps                | 745      |
| nupdates           | 258600   |
| policy_entropy     | 2.08     |
| policy_loss        | -0.0474  |
| total_timesteps    | 5172000  |
| value_loss         | 0.000534 |
---------------------------------
---------------------------------
| explained_variance | 0.176    |
| fps         

---------------------------------
| explained_variance | -131     |
| fps                | 745      |
| nupdates           | 261100   |
| policy_entropy     | 2.06     |
| policy_loss        | 0.107    |
| total_timesteps    | 5222000  |
| value_loss         | 0.045    |
---------------------------------
---------------------------------
| explained_variance | -0.344   |
| fps                | 745      |
| nupdates           | 261200   |
| policy_entropy     | 2.07     |
| policy_loss        | 0.698    |
| total_timesteps    | 5224000  |
| value_loss         | 0.122    |
---------------------------------
Eval num_timesteps=5225000, episode_reward=-4.80 +/- 0.40
Episode length: 645.00 +/- 164.60
---------------------------------
| explained_variance | 0.908    |
| fps                | 745      |
| nupdates           | 261300   |
| policy_entropy     | 2.08     |
| policy_loss        | -0.077   |
| total_timesteps    | 5226000  |
| value_loss         | 0.000961 |
------------------------

Eval num_timesteps=5275000, episode_reward=-5.00 +/- 0.00
Episode length: 571.70 +/- 85.81
---------------------------------
| explained_variance | 0.955    |
| fps                | 745      |
| nupdates           | 263800   |
| policy_entropy     | 2.07     |
| policy_loss        | 0.0175   |
| total_timesteps    | 5276000  |
| value_loss         | 0.000444 |
---------------------------------
---------------------------------
| explained_variance | 0.638    |
| fps                | 745      |
| nupdates           | 263900   |
| policy_entropy     | 2.04     |
| policy_loss        | -0.00169 |
| total_timesteps    | 5278000  |
| value_loss         | 0.000207 |
---------------------------------
---------------------------------
| explained_variance | 0.845    |
| fps                | 745      |
| nupdates           | 264000   |
| policy_entropy     | 2.07     |
| policy_loss        | -0.101   |
| total_timesteps    | 5280000  |
| value_loss         | 0.00186  |
-------------------------

---------------------------------
| explained_variance | 0.649    |
| fps                | 745      |
| nupdates           | 266400   |
| policy_entropy     | 2.08     |
| policy_loss        | -0.0609  |
| total_timesteps    | 5328000  |
| value_loss         | 0.0012   |
---------------------------------
---------------------------------
| explained_variance | 0.808    |
| fps                | 745      |
| nupdates           | 266500   |
| policy_entropy     | 2.08     |
| policy_loss        | -0.0272  |
| total_timesteps    | 5330000  |
| value_loss         | 0.000417 |
---------------------------------
---------------------------------
| explained_variance | 0.77     |
| fps                | 745      |
| nupdates           | 266600   |
| policy_entropy     | 2.08     |
| policy_loss        | 0.0648   |
| total_timesteps    | 5332000  |
| value_loss         | 0.00251  |
---------------------------------
---------------------------------
| explained_variance | 0.902    |
| fps         

---------------------------------
| explained_variance | -1.63    |
| fps                | 745      |
| nupdates           | 269100   |
| policy_entropy     | 2.06     |
| policy_loss        | -0.0617  |
| total_timesteps    | 5382000  |
| value_loss         | 0.00455  |
---------------------------------
---------------------------------
| explained_variance | 0.715    |
| fps                | 745      |
| nupdates           | 269200   |
| policy_entropy     | 2.06     |
| policy_loss        | 0.0699   |
| total_timesteps    | 5384000  |
| value_loss         | 0.0445   |
---------------------------------
---------------------------------
| explained_variance | 0.269    |
| fps                | 745      |
| nupdates           | 269300   |
| policy_entropy     | 2.04     |
| policy_loss        | 0.0392   |
| total_timesteps    | 5386000  |
| value_loss         | 0.00113  |
---------------------------------
---------------------------------
| explained_variance | 0.825    |
| fps         

---------------------------------
| explained_variance | 0.687    |
| fps                | 745      |
| nupdates           | 271800   |
| policy_entropy     | 2.06     |
| policy_loss        | -0.0885  |
| total_timesteps    | 5436000  |
| value_loss         | 0.00172  |
---------------------------------
---------------------------------
| explained_variance | 0.946    |
| fps                | 745      |
| nupdates           | 271900   |
| policy_entropy     | 2.07     |
| policy_loss        | -0.00384 |
| total_timesteps    | 5438000  |
| value_loss         | 9.25e-05 |
---------------------------------
---------------------------------
| explained_variance | 0.0102   |
| fps                | 745      |
| nupdates           | 272000   |
| policy_entropy     | 2.07     |
| policy_loss        | 0.0167   |
| total_timesteps    | 5440000  |
| value_loss         | 0.0414   |
---------------------------------
---------------------------------
| explained_variance | 0.935    |
| fps         

---------------------------------
| explained_variance | 0.964    |
| fps                | 745      |
| nupdates           | 274500   |
| policy_entropy     | 2.07     |
| policy_loss        | -0.0389  |
| total_timesteps    | 5490000  |
| value_loss         | 9.46e-05 |
---------------------------------
---------------------------------
| explained_variance | 0.913    |
| fps                | 745      |
| nupdates           | 274600   |
| policy_entropy     | 2.04     |
| policy_loss        | -0.00228 |
| total_timesteps    | 5492000  |
| value_loss         | 0.000144 |
---------------------------------
---------------------------------
| explained_variance | 0.774    |
| fps                | 745      |
| nupdates           | 274700   |
| policy_entropy     | 2.06     |
| policy_loss        | -0.106   |
| total_timesteps    | 5494000  |
| value_loss         | 0.043    |
---------------------------------
---------------------------------
| explained_variance | 0.888    |
| fps         

---------------------------------
| explained_variance | 0.946    |
| fps                | 746      |
| nupdates           | 277200   |
| policy_entropy     | 2.06     |
| policy_loss        | -0.0171  |
| total_timesteps    | 5544000  |
| value_loss         | 1.96e-05 |
---------------------------------
---------------------------------
| explained_variance | 0.653    |
| fps                | 746      |
| nupdates           | 277300   |
| policy_entropy     | 2.05     |
| policy_loss        | -0.482   |
| total_timesteps    | 5546000  |
| value_loss         | 0.108    |
---------------------------------
---------------------------------
| explained_variance | 0.947    |
| fps                | 746      |
| nupdates           | 277400   |
| policy_entropy     | 2.06     |
| policy_loss        | -0.0158  |
| total_timesteps    | 5548000  |
| value_loss         | 0.000144 |
---------------------------------
Eval num_timesteps=5550000, episode_reward=-4.90 +/- 0.30
Episode length: 619.50 +

---------------------------------
| explained_variance | 0.81     |
| fps                | 746      |
| nupdates           | 279900   |
| policy_entropy     | 2.06     |
| policy_loss        | -0.0539  |
| total_timesteps    | 5598000  |
| value_loss         | 0.00045  |
---------------------------------
Eval num_timesteps=5600000, episode_reward=-4.90 +/- 0.30
Episode length: 544.00 +/- 90.74
---------------------------------
| explained_variance | 0.471    |
| fps                | 745      |
| nupdates           | 280000   |
| policy_entropy     | 2.07     |
| policy_loss        | 0.0882   |
| total_timesteps    | 5600000  |
| value_loss         | 0.00324  |
---------------------------------
---------------------------------
| explained_variance | 0.968    |
| fps                | 746      |
| nupdates           | 280100   |
| policy_entropy     | 2.06     |
| policy_loss        | -0.0217  |
| total_timesteps    | 5602000  |
| value_loss         | 0.000132 |
-------------------------

---------------------------------
| explained_variance | 0.957    |
| fps                | 745      |
| nupdates           | 282600   |
| policy_entropy     | 2.05     |
| policy_loss        | 0.0205   |
| total_timesteps    | 5652000  |
| value_loss         | 0.000423 |
---------------------------------
---------------------------------
| explained_variance | 0.532    |
| fps                | 745      |
| nupdates           | 282700   |
| policy_entropy     | 2.05     |
| policy_loss        | 0.0827   |
| total_timesteps    | 5654000  |
| value_loss         | 0.00322  |
---------------------------------
---------------------------------
| explained_variance | -1.25    |
| fps                | 745      |
| nupdates           | 282800   |
| policy_entropy     | 2.08     |
| policy_loss        | -0.183   |
| total_timesteps    | 5656000  |
| value_loss         | 0.0118   |
---------------------------------
---------------------------------
| explained_variance | 0.719    |
| fps         

---------------------------------
| explained_variance | 0.968    |
| fps                | 745      |
| nupdates           | 285300   |
| policy_entropy     | 2.06     |
| policy_loss        | -0.0255  |
| total_timesteps    | 5706000  |
| value_loss         | 0.000121 |
---------------------------------
---------------------------------
| explained_variance | 0.567    |
| fps                | 745      |
| nupdates           | 285400   |
| policy_entropy     | 2.07     |
| policy_loss        | 0.0101   |
| total_timesteps    | 5708000  |
| value_loss         | 0.000419 |
---------------------------------
---------------------------------
| explained_variance | -79.8    |
| fps                | 745      |
| nupdates           | 285500   |
| policy_entropy     | 2.07     |
| policy_loss        | 0.0977   |
| total_timesteps    | 5710000  |
| value_loss         | 0.0397   |
---------------------------------
----------------------------------
| explained_variance | 0.953     |
| fps       

---------------------------------
| explained_variance | 0.114    |
| fps                | 745      |
| nupdates           | 288000   |
| policy_entropy     | 2.06     |
| policy_loss        | 0.0549   |
| total_timesteps    | 5760000  |
| value_loss         | 0.00285  |
---------------------------------
---------------------------------
| explained_variance | 0.987    |
| fps                | 745      |
| nupdates           | 288100   |
| policy_entropy     | 2.08     |
| policy_loss        | 0.0314   |
| total_timesteps    | 5762000  |
| value_loss         | 0.000685 |
---------------------------------
---------------------------------
| explained_variance | -5.97    |
| fps                | 745      |
| nupdates           | 288200   |
| policy_entropy     | 2.07     |
| policy_loss        | 0.271    |
| total_timesteps    | 5764000  |
| value_loss         | 0.0212   |
---------------------------------
---------------------------------
| explained_variance | 0.893    |
| fps         

---------------------------------
| explained_variance | -101     |
| fps                | 745      |
| nupdates           | 290700   |
| policy_entropy     | 2.06     |
| policy_loss        | 1.03     |
| total_timesteps    | 5814000  |
| value_loss         | 0.317    |
---------------------------------
---------------------------------
| explained_variance | 0.224    |
| fps                | 745      |
| nupdates           | 290800   |
| policy_entropy     | 2.07     |
| policy_loss        | -0.0622  |
| total_timesteps    | 5816000  |
| value_loss         | 0.0015   |
---------------------------------
---------------------------------
| explained_variance | 0.356    |
| fps                | 745      |
| nupdates           | 290900   |
| policy_entropy     | 2.07     |
| policy_loss        | -0.102   |
| total_timesteps    | 5818000  |
| value_loss         | 0.00278  |
---------------------------------
---------------------------------
| explained_variance | 0.568    |
| fps         

---------------------------------
| explained_variance | 0.902    |
| fps                | 745      |
| nupdates           | 293400   |
| policy_entropy     | 2.07     |
| policy_loss        | -0.0142  |
| total_timesteps    | 5868000  |
| value_loss         | 0.000294 |
---------------------------------
---------------------------------
| explained_variance | 0.522    |
| fps                | 745      |
| nupdates           | 293500   |
| policy_entropy     | 2.07     |
| policy_loss        | -2.01    |
| total_timesteps    | 5870000  |
| value_loss         | 0.95     |
---------------------------------
---------------------------------
| explained_variance | 0.725    |
| fps                | 745      |
| nupdates           | 293600   |
| policy_entropy     | 2.08     |
| policy_loss        | -0.0149  |
| total_timesteps    | 5872000  |
| value_loss         | 0.000319 |
---------------------------------
---------------------------------
| explained_variance | 0.593    |
| fps         

---------------------------------
| explained_variance | 0.927    |
| fps                | 745      |
| nupdates           | 296100   |
| policy_entropy     | 2.07     |
| policy_loss        | 0.0131   |
| total_timesteps    | 5922000  |
| value_loss         | 0.000295 |
---------------------------------
---------------------------------
| explained_variance | 0.753    |
| fps                | 745      |
| nupdates           | 296200   |
| policy_entropy     | 2.06     |
| policy_loss        | 0.0236   |
| total_timesteps    | 5924000  |
| value_loss         | 0.000566 |
---------------------------------
Eval num_timesteps=5925000, episode_reward=-4.90 +/- 0.30
Episode length: 583.70 +/- 80.71
---------------------------------
| explained_variance | -0.181   |
| fps                | 745      |
| nupdates           | 296300   |
| policy_entropy     | 2.07     |
| policy_loss        | 0.211    |
| total_timesteps    | 5926000  |
| value_loss         | 0.0142   |
-------------------------

Eval num_timesteps=5975000, episode_reward=-5.00 +/- 0.00
Episode length: 581.50 +/- 145.39
---------------------------------
| explained_variance | 0.661    |
| fps                | 745      |
| nupdates           | 298800   |
| policy_entropy     | 2.04     |
| policy_loss        | -0.037   |
| total_timesteps    | 5976000  |
| value_loss         | 0.000589 |
---------------------------------
---------------------------------
| explained_variance | 0.862    |
| fps                | 745      |
| nupdates           | 298900   |
| policy_entropy     | 2.06     |
| policy_loss        | -0.0584  |
| total_timesteps    | 5978000  |
| value_loss         | 0.00044  |
---------------------------------
---------------------------------
| explained_variance | 0.869    |
| fps                | 745      |
| nupdates           | 299000   |
| policy_entropy     | 2.07     |
| policy_loss        | -0.0134  |
| total_timesteps    | 5980000  |
| value_loss         | 0.000329 |
------------------------

---------------------------------
| explained_variance | 0.682    |
| fps                | 745      |
| nupdates           | 301400   |
| policy_entropy     | 2.04     |
| policy_loss        | -0.0793  |
| total_timesteps    | 6028000  |
| value_loss         | 0.00132  |
---------------------------------
---------------------------------
| explained_variance | 0.809    |
| fps                | 745      |
| nupdates           | 301500   |
| policy_entropy     | 2.03     |
| policy_loss        | 0.00468  |
| total_timesteps    | 6030000  |
| value_loss         | 0.0412   |
---------------------------------
---------------------------------
| explained_variance | 0.926    |
| fps                | 745      |
| nupdates           | 301600   |
| policy_entropy     | 2.04     |
| policy_loss        | 0.0154   |
| total_timesteps    | 6032000  |
| value_loss         | 0.000309 |
---------------------------------
---------------------------------
| explained_variance | 0.386    |
| fps         

---------------------------------
| explained_variance | 0.947    |
| fps                | 745      |
| nupdates           | 304100   |
| policy_entropy     | 2.05     |
| policy_loss        | 0.022    |
| total_timesteps    | 6082000  |
| value_loss         | 0.000554 |
---------------------------------
---------------------------------
| explained_variance | 0.702    |
| fps                | 745      |
| nupdates           | 304200   |
| policy_entropy     | 2.07     |
| policy_loss        | -0.0942  |
| total_timesteps    | 6084000  |
| value_loss         | 0.00204  |
---------------------------------
---------------------------------
| explained_variance | 0.912    |
| fps                | 745      |
| nupdates           | 304300   |
| policy_entropy     | 2.05     |
| policy_loss        | 0.0858   |
| total_timesteps    | 6086000  |
| value_loss         | 0.00306  |
---------------------------------
---------------------------------
| explained_variance | 0.881    |
| fps         

---------------------------------
| explained_variance | 0.395    |
| fps                | 745      |
| nupdates           | 306800   |
| policy_entropy     | 2.02     |
| policy_loss        | -0.603   |
| total_timesteps    | 6136000  |
| value_loss         | 0.145    |
---------------------------------
---------------------------------
| explained_variance | 0.945    |
| fps                | 745      |
| nupdates           | 306900   |
| policy_entropy     | 2.01     |
| policy_loss        | 0.0572   |
| total_timesteps    | 6138000  |
| value_loss         | 0.0017   |
---------------------------------
---------------------------------
| explained_variance | 0.583    |
| fps                | 745      |
| nupdates           | 307000   |
| policy_entropy     | 2.04     |
| policy_loss        | -0.161   |
| total_timesteps    | 6140000  |
| value_loss         | 0.0615   |
---------------------------------
---------------------------------
| explained_variance | 0.791    |
| fps         

---------------------------------
| explained_variance | 0.724    |
| fps                | 743      |
| nupdates           | 309500   |
| policy_entropy     | 2.02     |
| policy_loss        | -0.139   |
| total_timesteps    | 6190000  |
| value_loss         | 0.0481   |
---------------------------------
---------------------------------
| explained_variance | 0.792    |
| fps                | 743      |
| nupdates           | 309600   |
| policy_entropy     | 2.01     |
| policy_loss        | -0.0429  |
| total_timesteps    | 6192000  |
| value_loss         | 0.00075  |
---------------------------------
---------------------------------
| explained_variance | 0.76     |
| fps                | 743      |
| nupdates           | 309700   |
| policy_entropy     | 2.04     |
| policy_loss        | -0.0541  |
| total_timesteps    | 6194000  |
| value_loss         | 0.000484 |
---------------------------------
---------------------------------
| explained_variance | 0.825    |
| fps         

---------------------------------
| explained_variance | 0.841    |
| fps                | 740      |
| nupdates           | 312200   |
| policy_entropy     | 2.05     |
| policy_loss        | -0.1     |
| total_timesteps    | 6244000  |
| value_loss         | 0.00199  |
---------------------------------
---------------------------------
| explained_variance | -5.95    |
| fps                | 740      |
| nupdates           | 312300   |
| policy_entropy     | 2.05     |
| policy_loss        | -0.0543  |
| total_timesteps    | 6246000  |
| value_loss         | 0.00514  |
---------------------------------
---------------------------------
| explained_variance | 0.877    |
| fps                | 740      |
| nupdates           | 312400   |
| policy_entropy     | 2.06     |
| policy_loss        | -0.0366  |
| total_timesteps    | 6248000  |
| value_loss         | 0.000114 |
---------------------------------
Eval num_timesteps=6250000, episode_reward=-4.90 +/- 0.30
Episode length: 504.00 +

---------------------------------
| explained_variance | 0.339    |
| fps                | 737      |
| nupdates           | 314900   |
| policy_entropy     | 2.07     |
| policy_loss        | 0.064    |
| total_timesteps    | 6298000  |
| value_loss         | 0.00226  |
---------------------------------
Eval num_timesteps=6300000, episode_reward=-5.00 +/- 0.00
Episode length: 633.80 +/- 140.94
---------------------------------
| explained_variance | 0.744    |
| fps                | 736      |
| nupdates           | 315000   |
| policy_entropy     | 2.07     |
| policy_loss        | -0.253   |
| total_timesteps    | 6300000  |
| value_loss         | 0.0641   |
---------------------------------
---------------------------------
| explained_variance | -1.65    |
| fps                | 736      |
| nupdates           | 315100   |
| policy_entropy     | 2.06     |
| policy_loss        | -0.0572  |
| total_timesteps    | 6302000  |
| value_loss         | 0.00606  |
------------------------

KeyboardInterrupt: 

In [7]:
model_str = 'acktr'
LOGDIR = "sb-" + model_str
trained_model['acktr'].save(os.path.join(LOGDIR, "final_model"))

In [8]:
video_filename = 'acktr.mp4'
record_game(
    model=trained_model['acktr'],
    env=gym.make("SlimeVolley-v0"),
    num_episodes=5,
    video_filename=video_filename
)
embed_mp4(video_filename)



score: -5
score: -5
score: -5
score: -5
score: -5
