# Pommerman V4 Training in smaller central region

This notebook demonstrates how to train Pommerman agents. Please let us know at support@pommerman.com if you run into any issues.

In [1]:
import os
import sys
import numpy as np
import time

import pommerman
from pommerman.agents import SimpleAgent, RandomAgent, PlayerAgent, BaseAgent, StaticAgent, RandomAgentNoBomb, RandomAgent, SmartRandomAgentNoBomb
from pommerman.configs import lesson1_1v1_env, lesson2a_1v1_env, lesson2e_1v1_env, lesson3a_1v1_env, lesson3e_1v1_env, lesson4a_1v1_env
from pommerman.envs.v0 import Pomme as Pomme_v0
from pommerman.envs.v4 import Pomme as Pomme_v4
from pommerman.characters import Bomber
from pommerman import utility
from pommerman import agents
from pommerman import envs
from pommerman import constants
from pommerman import characters

# print all env configs
print(pommerman.REGISTRY)

['AdvancedLesson_1v1-v0', 'AdvancedLessonTeam-v0', 'PommeFFACompetition-v0', 'PommeFFACompetitionFast-v0', 'PommeFFAFast-v0', 'PommeFFA-v1', 'PommeFFAFast-v3', 'PommeFFAFast-v4', 'Lesson1_1v1-v0', 'Lesson1-v0', 'Lesson2Team-v0', 'Lesson2a_1v1-v0', 'Lesson2b_1v1-v0', 'Lesson2bTeam-v0', 'Lesson2c_1v1-v0', 'Lesson2c-v0', 'Lesson2d_1v1-v0', 'Lesson2d-v0', 'Lesson2e-v0', 'Lesson2eTeam-v0', 'Lesson3Team-v0', 'Lesson3a_1v1-v0', 'Lesson3b_1v1-v0', 'Lesson3bTeam-v0', 'Lesson3c-v0', 'Lesson3cTeam-v0', 'Lesson3d_1v1-v0', 'Lesson3dTeam-v0', 'OneVsOne-v0', 'PommeRadioCompetition-v2', 'PommeRadio-v2', 'Simple-v0', 'SimpleRandomTeam-v0', 'SimpleTeam-v0', 'PommeTeamCompetition-v0', 'PommeTeamCompetitionFast-v0', 'PommeTeamCompetition-v1', 'PommeTeam-v0', 'PommeTeamFast-v0', 'PommeTeamSimple-v0']


# Train with stable baseline

In [2]:
import gym

from stable_baselines.common.policies import MlpPolicy
from stable_baselines.common.vec_env import SubprocVecEnv, DummyVecEnv
from stable_baselines import PPO2

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



## Inherit pommerman env and make it compatible with stable-baseline

In [3]:
# class CustomPomme(Pomme_v4):
class CustomPomme(Pomme_v0):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.obs_raw = None # store the raw version of observation
        self.training_idx = 1 # idx of the agent being trained
    
    # function to flatten pommerman observation
    def _transform_obs(self, obs_raw):
        obs_training = obs_raw[self.training_idx] # default the first agent to be trained

        # construct flattened observation
        obs = [
            *np.array(obs_training["board"]).reshape(-1),
            *np.array(obs_training["bomb_blast_strength"]).reshape(-1),
            *np.array(obs_training["bomb_life"]).reshape(-1),
            *np.array(obs_training["position"]).reshape(-1),
            obs_training["ammo"],
            obs_training["blast_strength"],
            obs_training["can_kick"],
            obs_training["teammate"].value,
            obs_training["enemies"][0].value,
            
            # uncommon if training 1 v 1
            obs_training["enemies"][0].value,
            obs_training["enemies"][0].value,
            
            # uncommon if training 2 v 2
#             obs_training["enemies"][1].value,
#             obs_training["enemies"][2].value,
        ]
        return obs
    
    def get_obs_raw(self):
        return self.obs_raw

    def step(self, action_training):
        action_nontraining = self.act(self.obs_raw)
        actions = [*action_nontraining, action_training]
        obs_raw, reward, done, info = super().step(actions)
        self.obs_raw = obs_raw
        return self._transform_obs(obs_raw), reward[self.training_idx], done, info
    
    def reset(self):
        obs_raw = super().reset()
        self.obs_raw = obs_raw
        return self._transform_obs(obs_raw)
    
    def render(self,
               mode=None,
               close=False,
               record_pngs_dir=None,
               record_json_dir=None,
               do_sleep=True):
        super().render(mode=mode,
                       close=close,
                       record_pngs_dir=record_pngs_dir,
                       record_json_dir=record_json_dir,
                       do_sleep=do_sleep)

# Custom CNN Policy

In [10]:
import tensorflow as tf

from stable_baselines.a2c.utils import linear
from stable_baselines.common.policies import ActorCriticPolicy

class CustomCNN(ActorCriticPolicy):
    def __init__(self, sess, ob_space, ac_space, n_env, n_steps, n_batch, reuse=False, **kwargs):
        super(CustomCNN, self).__init__(sess, ob_space, ac_space, n_env, n_steps, n_batch, reuse=reuse)
        size=11
        bp = 3*size**2 #board partition
        with tf.variable_scope("model", reuse=reuse):
            obs = self.processed_obs
            self.board1, self.misc = tf.split(obs, [bp, -1], 1)
            
            self.board = tf.reshape(self.board1, (-1, size, size, 3))
            self.conv1 = tf.layers.conv2d(inputs=self.board, filters=64, kernel_size=2, padding='VALID', activation=tf.nn.relu, name='conv1')
            self.conv2 = tf.layers.conv2d(inputs=self.conv1, filters=32, kernel_size=2, padding='VALID', activation=tf.nn.relu, name='conv2')
            self.fc0 = tf.contrib.layers.flatten(self.conv2)
            self.fc1 = tf.concat((self.fc0, self.misc), -1)
            self.fc1 = tf.layers.dense(self.fc1, 1024, name = 'fc1')
            self.actions = tf.layers.dense(self.fc1, 6)   
            self.valueUM = tf.layers.dense(self.fc1, 128) #??

            self._proba_distribution, self._policy, self.q_value = \
                self.pdtype.proba_distribution_from_latent(self.actions, self.valueUM, init_scale=0.01)

        self._value_fn = linear(self.valueUM, 'vf', 1)
        self._setup_init()

    def step(self, obs, state=None, mask=None, deterministic=False):
        if deterministic:
            action, value, neglogp = self.sess.run([self.deterministic_action, self.value_flat, self.neglogp],
                                                   {self.obs_ph: obs})
        else:
            action, value, neglogp = self.sess.run([self.action, self.value_flat, self.neglogp],
                                                   {self.obs_ph: obs})
        return action, value, self.initial_state, neglogp

    def proba_step(self, obs, state=None, mask=None):
        return self.sess.run(self.policy_proba, {self.obs_ph: obs})

    def value(self, obs, state=None, mask=None):
        return self.sess.run(self.value_flat, {self.obs_ph: obs})

In [5]:
# def team_v3_fast_env():
#     """Start up a FFA config with the default settings."""
#     env = CustomPomme
#     game_type = constants.GameType.Team
#     env_entry_point = 'CustomPomme'
#     env_id = 'PommeTeamFast-v3'
#     env_kwargs = {
#         'game_type': game_type,
#         'board_size': 8,
#         'num_rigid': 0,
#         'num_wood': 0,
#         'num_items': 0,
#         'max_steps': constants.MAX_STEPS,
#         'render_fps': 1000,
#         'env': env_entry_point,
#     }
#     agent = characters.Bomber
#     return locals()

def one_vs_one_v3_env():
    """Start up a FFA config with the default settings."""
    env = CustomPomme
    game_type = constants.GameType.OneVsOne
    env_entry_point = 'CustomPomme'
    env_id = 'PommeOneVsOneFast-v3'
    env_kwargs = {
        'game_type': game_type,
        'board_size': 11,
        'num_rigid': 0,
        'num_wood': 4,
        'num_items': 0,
        'max_steps': constants.MAX_STEPS,
        'render_fps': 1000,
        'rand_agent_pos': True,
        'env': env_entry_point,
    }
    agent = characters.Bomber
    return locals()

def one_vs_one_v4_env():
    """Start up a FFA config with the default settings."""
    env = CustomPomme
    game_type = constants.GameType.OneVsOne
    env_entry_point = 'CustomPomme'
    env_id = 'PommeOneVsOneFast-v4'
    env_kwargs = {
        'game_type': game_type,
        'board_size': 11,
        'free_board_size': 8,
        'num_rigid': 0,
        'num_wood': 32,
        'num_items': 0,
        'max_steps': constants.MAX_STEPS,
        'render_fps': 1000,
        'rand_agent_pos': True,
        'env': env_entry_point,
    }
    agent = characters.Bomber
    return locals()

In [16]:
# Instantiate the environment

config = lesson1_1v1_env()
env_pom = CustomPomme(**config["env_kwargs"])


# config agents
agents = []

# Add simple agents
for agent_id in range(1):
    agents.append(StaticAgent(config["agent"](agent_id, config["game_type"])))

# add player agent(to train)
agents.append(PlayerAgent(config["agent"](1, config["game_type"])))

env_pom.set_agents(agents)
env_pom.set_training_agent(agents[1].agent_id)
env_pom.set_init_game_state(None)

# Seed and reset the environment
env_pom.seed(0)

[0]

In [17]:
# log function during training, implement if needed
def log(local_var, global_var):
    pass
#     display(local_var)
#     display(global_var)

In [24]:
import time
n_cpu = 1
env = DummyVecEnv([lambda: env_pom for i in range(n_cpu)])

# model = PPO2(policy=CustomCNN, 
#              env=env, 
#              verbose=1, 
#              n_steps=3000, # batch_size = n_step * num_env
#              ent_coef=0.001, # entropy coefficient
#              tensorboard_log="./PPO2_CNNPolicy_tensorboard/")
             
# model = PPO2(policy='MlpPolicy', 
#              env=env, 
#              verbose=1, 
#              n_steps=3000, # batch_size = n_step * num_env
#              ent_coef=0.001, # entropy coefficient
#              tensorboard_log="./PPO2_pommerman_tensorboard/")

model = PPO2.load(load_path='PPO2_MlpPolicy_lesson1_Stable_4M', 
                  env=env)
model.tensorboard_log = "./PPO2_pommerman_tensorboard/"

startTime = time.time()
model = model.learn(total_timesteps=4000000, # num_update = total_timesteps // batch_size
                    callback=log)
endTime = time.time()
elapsedTime = endTime - startTime
print(elapsedTime)

model.save("PPO2_MlpPolicy_lesson1_Stable_8M")

--------------------------------------
| approxkl           | 0.003051533   |
| clipfrac           | 0.02933333    |
| explained_variance | 0.201         |
| fps                | 1492          |
| n_updates          | 1             |
| policy_entropy     | 1.1539589     |
| policy_loss        | -0.0068770084 |
| serial_timesteps   | 3000          |
| time_elapsed       | 3.81e-06      |
| total_timesteps    | 3000          |
| value_loss         | 0.010225196   |
--------------------------------------
-------------------------------------
| approxkl           | 0.0026689176 |
| clipfrac           | 0.028083334  |
| explained_variance | 0.224        |
| fps                | 1666         |
| n_updates          | 2            |
| policy_entropy     | 1.1885583    |
| policy_loss        | -0.003852942 |
| serial_timesteps   | 6000         |
| time_elapsed       | 2.01         |
| total_timesteps    | 6000         |
| value_loss         | 0.014788186  |
-------------------------------------

--------------------------------------
| approxkl           | 0.0008716986  |
| clipfrac           | 0.007916667   |
| explained_variance | 0.339         |
| fps                | 1707          |
| n_updates          | 18            |
| policy_entropy     | 1.0582573     |
| policy_loss        | -0.0032159938 |
| serial_timesteps   | 54000         |
| time_elapsed       | 31.2          |
| total_timesteps    | 54000         |
| value_loss         | 0.009130327   |
--------------------------------------
-------------------------------------
| approxkl           | 0.0038164894 |
| clipfrac           | 0.054500002  |
| explained_variance | 0.181        |
| fps                | 1669         |
| n_updates          | 19           |
| policy_entropy     | 1.0709889    |
| policy_loss        | -0.006219391 |
| serial_timesteps   | 57000        |
| time_elapsed       | 33           |
| total_timesteps    | 57000        |
| value_loss         | 0.010033565  |
-------------------------------------

--------------------------------------
| approxkl           | 0.0015151575  |
| clipfrac           | 0.013583333   |
| explained_variance | 0.554         |
| fps                | 1636          |
| n_updates          | 35            |
| policy_entropy     | 1.072666      |
| policy_loss        | -0.0058092363 |
| serial_timesteps   | 105000        |
| time_elapsed       | 62.1          |
| total_timesteps    | 105000        |
| value_loss         | 0.013309544   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0031968858  |
| clipfrac           | 0.045         |
| explained_variance | 0.306         |
| fps                | 1628          |
| n_updates          | 36            |
| policy_entropy     | 1.0949414     |
| policy_loss        | -0.0044374205 |
| serial_timesteps   | 108000        |
| time_elapsed       | 63.9          |
| total_timesteps    | 108000        |
| value_loss         | 0.009851315   |
-------------------------

--------------------------------------
| approxkl           | 0.0010929266  |
| clipfrac           | 0.009083333   |
| explained_variance | 0.807         |
| fps                | 1614          |
| n_updates          | 52            |
| policy_entropy     | 1.1602792     |
| policy_loss        | -0.0039409935 |
| serial_timesteps   | 156000        |
| time_elapsed       | 93.3          |
| total_timesteps    | 156000        |
| value_loss         | 0.011659163   |
--------------------------------------
-------------------------------------
| approxkl           | 0.0015833462 |
| clipfrac           | 0.012416666  |
| explained_variance | 0.681        |
| fps                | 1602         |
| n_updates          | 53           |
| policy_entropy     | 1.1355269    |
| policy_loss        | -0.002823684 |
| serial_timesteps   | 159000       |
| time_elapsed       | 95.1         |
| total_timesteps    | 159000       |
| value_loss         | 0.00866971   |
-------------------------------------

--------------------------------------
| approxkl           | 0.0014597855  |
| clipfrac           | 0.011         |
| explained_variance | 0.867         |
| fps                | 1650          |
| n_updates          | 69            |
| policy_entropy     | 1.1079142     |
| policy_loss        | -0.0015565425 |
| serial_timesteps   | 207000        |
| time_elapsed       | 124           |
| total_timesteps    | 207000        |
| value_loss         | 0.0087893065  |
--------------------------------------
-------------------------------------
| approxkl           | 0.0013234331 |
| clipfrac           | 0.009500001  |
| explained_variance | 0.453        |
| fps                | 1735         |
| n_updates          | 70           |
| policy_entropy     | 1.0850458    |
| policy_loss        | -0.002597121 |
| serial_timesteps   | 210000       |
| time_elapsed       | 126          |
| total_timesteps    | 210000       |
| value_loss         | 0.00916257   |
-------------------------------------

-------------------------------------
| approxkl           | 0.002357487  |
| clipfrac           | 0.025333334  |
| explained_variance | 0.685        |
| fps                | 1700         |
| n_updates          | 86           |
| policy_entropy     | 1.1017282    |
| policy_loss        | -0.002500682 |
| serial_timesteps   | 258000       |
| time_elapsed       | 155          |
| total_timesteps    | 258000       |
| value_loss         | 0.008299709  |
-------------------------------------
--------------------------------------
| approxkl           | 0.002559229   |
| clipfrac           | 0.014333334   |
| explained_variance | 0.777         |
| fps                | 1659          |
| n_updates          | 87            |
| policy_entropy     | 1.0638746     |
| policy_loss        | -0.0024783267 |
| serial_timesteps   | 261000        |
| time_elapsed       | 157           |
| total_timesteps    | 261000        |
| value_loss         | 0.008332101   |
--------------------------------------

--------------------------------------
| approxkl           | 0.0014572754  |
| clipfrac           | 0.0046666665  |
| explained_variance | 0.591         |
| fps                | 1660          |
| n_updates          | 103           |
| policy_entropy     | 1.099846      |
| policy_loss        | -0.0027036518 |
| serial_timesteps   | 309000        |
| time_elapsed       | 186           |
| total_timesteps    | 309000        |
| value_loss         | 0.009144481   |
--------------------------------------
--------------------------------------
| approxkl           | 0.00086253823 |
| clipfrac           | 0.00275       |
| explained_variance | 0.545         |
| fps                | 1659          |
| n_updates          | 104           |
| policy_entropy     | 1.0958326     |
| policy_loss        | -0.003300115  |
| serial_timesteps   | 312000        |
| time_elapsed       | 188           |
| total_timesteps    | 312000        |
| value_loss         | 0.0097866645  |
-------------------------

--------------------------------------
| approxkl           | 0.0016464044  |
| clipfrac           | 0.018166667   |
| explained_variance | 0.789         |
| fps                | 1708          |
| n_updates          | 120           |
| policy_entropy     | 0.82428193    |
| policy_loss        | -0.0019473543 |
| serial_timesteps   | 360000        |
| time_elapsed       | 217           |
| total_timesteps    | 360000        |
| value_loss         | 0.008450125   |
--------------------------------------
--------------------------------------
| approxkl           | 0.002502056   |
| clipfrac           | 0.024         |
| explained_variance | 0.769         |
| fps                | 1653          |
| n_updates          | 121           |
| policy_entropy     | 0.9994141     |
| policy_loss        | -0.0036603676 |
| serial_timesteps   | 363000        |
| time_elapsed       | 219           |
| total_timesteps    | 363000        |
| value_loss         | 0.008487893   |
-------------------------

--------------------------------------
| approxkl           | 0.0004953482  |
| clipfrac           | 0.0010833333  |
| explained_variance | 0.72          |
| fps                | 1668          |
| n_updates          | 137           |
| policy_entropy     | 0.99689287    |
| policy_loss        | -0.0019647535 |
| serial_timesteps   | 411000        |
| time_elapsed       | 247           |
| total_timesteps    | 411000        |
| value_loss         | 0.00897404    |
--------------------------------------
-------------------------------------
| approxkl           | 0.0029220264 |
| clipfrac           | 0.04666667   |
| explained_variance | 0.68         |
| fps                | 1719         |
| n_updates          | 138          |
| policy_entropy     | 0.71100926   |
| policy_loss        | -0.003381903 |
| serial_timesteps   | 414000       |
| time_elapsed       | 249          |
| total_timesteps    | 414000       |
| value_loss         | 0.008508226  |
-------------------------------------

-------------------------------------
| approxkl           | 0.0027202151 |
| clipfrac           | 0.028416667  |
| explained_variance | 0.628        |
| fps                | 1663         |
| n_updates          | 154          |
| policy_entropy     | 1.0209819    |
| policy_loss        | -0.004007143 |
| serial_timesteps   | 462000       |
| time_elapsed       | 278          |
| total_timesteps    | 462000       |
| value_loss         | 0.00822461   |
-------------------------------------
-------------------------------------
| approxkl           | 0.0025557226 |
| clipfrac           | 0.035499997  |
| explained_variance | 0.724        |
| fps                | 1665         |
| n_updates          | 155          |
| policy_entropy     | 0.9013314    |
| policy_loss        | -0.002894692 |
| serial_timesteps   | 465000       |
| time_elapsed       | 280          |
| total_timesteps    | 465000       |
| value_loss         | 0.008991837  |
-------------------------------------
------------

-------------------------------------
| approxkl           | 0.0024854315 |
| clipfrac           | 0.031333335  |
| explained_variance | 0.869        |
| fps                | 1643         |
| n_updates          | 171          |
| policy_entropy     | 0.8610856    |
| policy_loss        | -0.004221265 |
| serial_timesteps   | 513000       |
| time_elapsed       | 309          |
| total_timesteps    | 513000       |
| value_loss         | 0.009816072  |
-------------------------------------
--------------------------------------
| approxkl           | 0.0020442293  |
| clipfrac           | 0.021         |
| explained_variance | 0.786         |
| fps                | 1664          |
| n_updates          | 172           |
| policy_entropy     | 0.89697564    |
| policy_loss        | -0.0036012153 |
| serial_timesteps   | 516000        |
| time_elapsed       | 310           |
| total_timesteps    | 516000        |
| value_loss         | 0.00964969    |
--------------------------------------

--------------------------------------
| approxkl           | 0.0012610842  |
| clipfrac           | 0.007666667   |
| explained_variance | 0.874         |
| fps                | 1590          |
| n_updates          | 188           |
| policy_entropy     | 0.8971347     |
| policy_loss        | -0.0035872904 |
| serial_timesteps   | 564000        |
| time_elapsed       | 340           |
| total_timesteps    | 564000        |
| value_loss         | 0.011658463   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0011802847  |
| clipfrac           | 0.011083333   |
| explained_variance | 0.933         |
| fps                | 1566          |
| n_updates          | 189           |
| policy_entropy     | 0.9329204     |
| policy_loss        | -0.0039355825 |
| serial_timesteps   | 567000        |
| time_elapsed       | 342           |
| total_timesteps    | 567000        |
| value_loss         | 0.0073624346  |
-------------------------

--------------------------------------
| approxkl           | 0.0015220032  |
| clipfrac           | 0.014583333   |
| explained_variance | 0.926         |
| fps                | 1640          |
| n_updates          | 205           |
| policy_entropy     | 0.81373596    |
| policy_loss        | -0.0034861797 |
| serial_timesteps   | 615000        |
| time_elapsed       | 371           |
| total_timesteps    | 615000        |
| value_loss         | 0.008585139   |
--------------------------------------
--------------------------------------
| approxkl           | 0.00093537953 |
| clipfrac           | 0.00525       |
| explained_variance | 0.91          |
| fps                | 1636          |
| n_updates          | 206           |
| policy_entropy     | 0.88661       |
| policy_loss        | -0.0026466062 |
| serial_timesteps   | 618000        |
| time_elapsed       | 373           |
| total_timesteps    | 618000        |
| value_loss         | 0.0080334665  |
-------------------------

-------------------------------------
| approxkl           | 0.0021440694 |
| clipfrac           | 0.023833334  |
| explained_variance | 0.879        |
| fps                | 1644         |
| n_updates          | 222          |
| policy_entropy     | 0.98567563   |
| policy_loss        | -0.002903676 |
| serial_timesteps   | 666000       |
| time_elapsed       | 402          |
| total_timesteps    | 666000       |
| value_loss         | 0.008627416  |
-------------------------------------
--------------------------------------
| approxkl           | 0.00089535944 |
| clipfrac           | 0.004916667   |
| explained_variance | 0.878         |
| fps                | 1659          |
| n_updates          | 223           |
| policy_entropy     | 0.903163      |
| policy_loss        | -0.0015192726 |
| serial_timesteps   | 669000        |
| time_elapsed       | 404           |
| total_timesteps    | 669000        |
| value_loss         | 0.008161627   |
--------------------------------------

--------------------------------------
| approxkl           | 0.0025917944  |
| clipfrac           | 0.036083333   |
| explained_variance | 0.892         |
| fps                | 1677          |
| n_updates          | 239           |
| policy_entropy     | 0.93525434    |
| policy_loss        | -0.0026166635 |
| serial_timesteps   | 717000        |
| time_elapsed       | 433           |
| total_timesteps    | 717000        |
| value_loss         | 0.007932107   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0014462118  |
| clipfrac           | 0.021         |
| explained_variance | 0.931         |
| fps                | 1701          |
| n_updates          | 240           |
| policy_entropy     | 0.7686643     |
| policy_loss        | -0.0018315057 |
| serial_timesteps   | 720000        |
| time_elapsed       | 435           |
| total_timesteps    | 720000        |
| value_loss         | 0.0078203045  |
-------------------------

--------------------------------------
| approxkl           | 0.0010863335  |
| clipfrac           | 0.00875       |
| explained_variance | 0.833         |
| fps                | 1649          |
| n_updates          | 256           |
| policy_entropy     | 0.7178266     |
| policy_loss        | -0.0024714037 |
| serial_timesteps   | 768000        |
| time_elapsed       | 464           |
| total_timesteps    | 768000        |
| value_loss         | 0.008655054   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0015330544  |
| clipfrac           | 0.01975       |
| explained_variance | 0.923         |
| fps                | 1677          |
| n_updates          | 257           |
| policy_entropy     | 0.72144365    |
| policy_loss        | -0.0026497478 |
| serial_timesteps   | 771000        |
| time_elapsed       | 466           |
| total_timesteps    | 771000        |
| value_loss         | 0.0077761915  |
-------------------------

--------------------------------------
| approxkl           | 0.0023681868  |
| clipfrac           | 0.035916667   |
| explained_variance | 0.902         |
| fps                | 1647          |
| n_updates          | 273           |
| policy_entropy     | 0.8913819     |
| policy_loss        | -0.0026223094 |
| serial_timesteps   | 819000        |
| time_elapsed       | 495           |
| total_timesteps    | 819000        |
| value_loss         | 0.0080924565  |
--------------------------------------
--------------------------------------
| approxkl           | 0.000601153   |
| clipfrac           | 0.0011666666  |
| explained_variance | 0.932         |
| fps                | 1643          |
| n_updates          | 274           |
| policy_entropy     | 0.938516      |
| policy_loss        | -0.0013367869 |
| serial_timesteps   | 822000        |
| time_elapsed       | 497           |
| total_timesteps    | 822000        |
| value_loss         | 0.0073082186  |
-------------------------

-------------------------------------
| approxkl           | 0.0042331675 |
| clipfrac           | 0.049583334  |
| explained_variance | 0.909        |
| fps                | 1636         |
| n_updates          | 290          |
| policy_entropy     | 1.0414388    |
| policy_loss        | -0.004075779 |
| serial_timesteps   | 870000       |
| time_elapsed       | 526          |
| total_timesteps    | 870000       |
| value_loss         | 0.007716071  |
-------------------------------------
-------------------------------------
| approxkl           | 0.0023657256 |
| clipfrac           | 0.0185       |
| explained_variance | 0.896        |
| fps                | 1668         |
| n_updates          | 291          |
| policy_entropy     | 1.0403261    |
| policy_loss        | -0.002922807 |
| serial_timesteps   | 873000       |
| time_elapsed       | 528          |
| total_timesteps    | 873000       |
| value_loss         | 0.008230867  |
-------------------------------------
------------

-------------------------------------
| approxkl           | 0.0018793022 |
| clipfrac           | 0.017416667  |
| explained_variance | 0.902        |
| fps                | 1582         |
| n_updates          | 307          |
| policy_entropy     | 1.0632272    |
| policy_loss        | -0.003421577 |
| serial_timesteps   | 921000       |
| time_elapsed       | 557          |
| total_timesteps    | 921000       |
| value_loss         | 0.008006696  |
-------------------------------------
-------------------------------------
| approxkl           | 0.001316272  |
| clipfrac           | 0.008166667  |
| explained_variance | 0.781        |
| fps                | 1622         |
| n_updates          | 308          |
| policy_entropy     | 1.0881227    |
| policy_loss        | -0.003904386 |
| serial_timesteps   | 924000       |
| time_elapsed       | 559          |
| total_timesteps    | 924000       |
| value_loss         | 0.00834712   |
-------------------------------------
------------

-------------------------------------
| approxkl           | 0.001956591  |
| clipfrac           | 0.016916666  |
| explained_variance | 0.91         |
| fps                | 1610         |
| n_updates          | 324          |
| policy_entropy     | 1.0296713    |
| policy_loss        | -0.00441659  |
| serial_timesteps   | 972000       |
| time_elapsed       | 589          |
| total_timesteps    | 972000       |
| value_loss         | 0.0077546723 |
-------------------------------------
--------------------------------------
| approxkl           | 0.003370833   |
| clipfrac           | 0.03866667    |
| explained_variance | 0.927         |
| fps                | 1630          |
| n_updates          | 325           |
| policy_entropy     | 1.002413      |
| policy_loss        | -0.0045504468 |
| serial_timesteps   | 975000        |
| time_elapsed       | 591           |
| total_timesteps    | 975000        |
| value_loss         | 0.008032849   |
--------------------------------------

--------------------------------------
| approxkl           | 0.003437804   |
| clipfrac           | 0.034166664   |
| explained_variance | 0.933         |
| fps                | 1503          |
| n_updates          | 341           |
| policy_entropy     | 0.9179753     |
| policy_loss        | -0.0037866947 |
| serial_timesteps   | 1023000       |
| time_elapsed       | 621           |
| total_timesteps    | 1023000       |
| value_loss         | 0.0071789287  |
--------------------------------------
-------------------------------------
| approxkl           | 0.001469707  |
| clipfrac           | 0.01375      |
| explained_variance | 0.903        |
| fps                | 1600         |
| n_updates          | 342          |
| policy_entropy     | 0.8593023    |
| policy_loss        | -0.003292668 |
| serial_timesteps   | 1026000      |
| time_elapsed       | 623          |
| total_timesteps    | 1026000      |
| value_loss         | 0.007926059  |
-------------------------------------

--------------------------------------
| approxkl           | 0.00074558635 |
| clipfrac           | 0.0063333334  |
| explained_variance | 0.931         |
| fps                | 181           |
| n_updates          | 358           |
| policy_entropy     | 0.70144165    |
| policy_loss        | -0.0017466263 |
| serial_timesteps   | 1074000       |
| time_elapsed       | 2.55e+03      |
| total_timesteps    | 1074000       |
| value_loss         | 0.007121216   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0012681411  |
| clipfrac           | 0.010333333   |
| explained_variance | 0.929         |
| fps                | 238           |
| n_updates          | 359           |
| policy_entropy     | 0.7041118     |
| policy_loss        | -0.0024644856 |
| serial_timesteps   | 1077000       |
| time_elapsed       | 2.57e+03      |
| total_timesteps    | 1077000       |
| value_loss         | 0.0070583858  |
-------------------------

--------------------------------------
| approxkl           | 0.0007051459  |
| clipfrac           | 0.0055833333  |
| explained_variance | 0.896         |
| fps                | 1388          |
| n_updates          | 375           |
| policy_entropy     | 0.73321736    |
| policy_loss        | -0.0016899789 |
| serial_timesteps   | 1125000       |
| time_elapsed       | 2.62e+03      |
| total_timesteps    | 1125000       |
| value_loss         | 0.008028486   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0008604425  |
| clipfrac           | 0.01025       |
| explained_variance | 0.901         |
| fps                | 1415          |
| n_updates          | 376           |
| policy_entropy     | 0.68523055    |
| policy_loss        | -0.0024414863 |
| serial_timesteps   | 1128000       |
| time_elapsed       | 2.62e+03      |
| total_timesteps    | 1128000       |
| value_loss         | 0.0077595618  |
-------------------------

--------------------------------------
| approxkl           | 0.0010987737  |
| clipfrac           | 0.010083334   |
| explained_variance | 0.92          |
| fps                | 1301          |
| n_updates          | 392           |
| policy_entropy     | 0.707146      |
| policy_loss        | -0.0026744194 |
| serial_timesteps   | 1176000       |
| time_elapsed       | 2.66e+03      |
| total_timesteps    | 1176000       |
| value_loss         | 0.0077261217  |
--------------------------------------
--------------------------------------
| approxkl           | 0.0006213876  |
| clipfrac           | 0.0024166668  |
| explained_variance | 0.863         |
| fps                | 1342          |
| n_updates          | 393           |
| policy_entropy     | 0.7193021     |
| policy_loss        | -0.0011185887 |
| serial_timesteps   | 1179000       |
| time_elapsed       | 2.66e+03      |
| total_timesteps    | 1179000       |
| value_loss         | 0.007803745   |
-------------------------

--------------------------------------
| approxkl           | 0.0023910583  |
| clipfrac           | 0.032333333   |
| explained_variance | 0.849         |
| fps                | 1264          |
| n_updates          | 409           |
| policy_entropy     | 0.8360789     |
| policy_loss        | -0.0028951052 |
| serial_timesteps   | 1227000       |
| time_elapsed       | 2.7e+03       |
| total_timesteps    | 1227000       |
| value_loss         | 0.008391132   |
--------------------------------------
--------------------------------------
| approxkl           | 0.00090356276 |
| clipfrac           | 0.00825       |
| explained_variance | 0.902         |
| fps                | 1267          |
| n_updates          | 410           |
| policy_entropy     | 0.71667814    |
| policy_loss        | -0.002027262  |
| serial_timesteps   | 1230000       |
| time_elapsed       | 2.7e+03       |
| total_timesteps    | 1230000       |
| value_loss         | 0.00774807    |
-------------------------

--------------------------------------
| approxkl           | 0.0010281397  |
| clipfrac           | 0.006666667   |
| explained_variance | 0.923         |
| fps                | 1299          |
| n_updates          | 426           |
| policy_entropy     | 0.7629998     |
| policy_loss        | -0.0034608792 |
| serial_timesteps   | 1278000       |
| time_elapsed       | 2.74e+03      |
| total_timesteps    | 1278000       |
| value_loss         | 0.0082591735  |
--------------------------------------
--------------------------------------
| approxkl           | 0.001384798   |
| clipfrac           | 0.009416666   |
| explained_variance | 0.926         |
| fps                | 1332          |
| n_updates          | 427           |
| policy_entropy     | 0.63696134    |
| policy_loss        | -0.0023450828 |
| serial_timesteps   | 1281000       |
| time_elapsed       | 2.74e+03      |
| total_timesteps    | 1281000       |
| value_loss         | 0.0077674966  |
-------------------------

--------------------------------------
| approxkl           | 0.0013131186  |
| clipfrac           | 0.015000001   |
| explained_variance | 0.914         |
| fps                | 1196          |
| n_updates          | 443           |
| policy_entropy     | 0.6635433     |
| policy_loss        | -0.0025043278 |
| serial_timesteps   | 1329000       |
| time_elapsed       | 2.77e+03      |
| total_timesteps    | 1329000       |
| value_loss         | 0.007940413   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0022042792  |
| clipfrac           | 0.024666665   |
| explained_variance | 0.898         |
| fps                | 1208          |
| n_updates          | 444           |
| policy_entropy     | 0.6250087     |
| policy_loss        | -0.0025153246 |
| serial_timesteps   | 1332000       |
| time_elapsed       | 2.78e+03      |
| total_timesteps    | 1332000       |
| value_loss         | 0.007622486   |
-------------------------

-------------------------------------
| approxkl           | 0.002884943  |
| clipfrac           | 0.042083338  |
| explained_variance | 0.887        |
| fps                | 1373         |
| n_updates          | 460          |
| policy_entropy     | 0.7455259    |
| policy_loss        | -0.004113723 |
| serial_timesteps   | 1380000      |
| time_elapsed       | 2.81e+03     |
| total_timesteps    | 1380000      |
| value_loss         | 0.009202384  |
-------------------------------------
-------------------------------------
| approxkl           | 0.0027204966 |
| clipfrac           | 0.036833335  |
| explained_variance | 0.899        |
| fps                | 1358         |
| n_updates          | 461          |
| policy_entropy     | 0.6381407    |
| policy_loss        | -0.003827436 |
| serial_timesteps   | 1383000      |
| time_elapsed       | 2.81e+03     |
| total_timesteps    | 1383000      |
| value_loss         | 0.007426709  |
-------------------------------------
------------

--------------------------------------
| approxkl           | 0.0018248906  |
| clipfrac           | 0.021083334   |
| explained_variance | 0.877         |
| fps                | 1308          |
| n_updates          | 477           |
| policy_entropy     | 0.6208309     |
| policy_loss        | -0.0028473283 |
| serial_timesteps   | 1431000       |
| time_elapsed       | 2.85e+03      |
| total_timesteps    | 1431000       |
| value_loss         | 0.008114936   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0027747478  |
| clipfrac           | 0.028250003   |
| explained_variance | 0.909         |
| fps                | 1308          |
| n_updates          | 478           |
| policy_entropy     | 0.83424926    |
| policy_loss        | -0.0018354466 |
| serial_timesteps   | 1434000       |
| time_elapsed       | 2.85e+03      |
| total_timesteps    | 1434000       |
| value_loss         | 0.00769574    |
-------------------------

--------------------------------------
| approxkl           | 0.0019436013  |
| clipfrac           | 0.01925       |
| explained_variance | 0.892         |
| fps                | 1262          |
| n_updates          | 494           |
| policy_entropy     | 0.71819276    |
| policy_loss        | -0.0036091516 |
| serial_timesteps   | 1482000       |
| time_elapsed       | 2.89e+03      |
| total_timesteps    | 1482000       |
| value_loss         | 0.008547031   |
--------------------------------------
-------------------------------------
| approxkl           | 0.0034861586 |
| clipfrac           | 0.051000003  |
| explained_variance | 0.939        |
| fps                | 1262         |
| n_updates          | 495          |
| policy_entropy     | 0.64754236   |
| policy_loss        | -0.004680701 |
| serial_timesteps   | 1485000      |
| time_elapsed       | 2.89e+03     |
| total_timesteps    | 1485000      |
| value_loss         | 0.007319711  |
-------------------------------------

--------------------------------------
| approxkl           | 0.000900057   |
| clipfrac           | 0.008166667   |
| explained_variance | 0.842         |
| fps                | 1274          |
| n_updates          | 511           |
| policy_entropy     | 0.7762404     |
| policy_loss        | -0.0017496388 |
| serial_timesteps   | 1533000       |
| time_elapsed       | 2.93e+03      |
| total_timesteps    | 1533000       |
| value_loss         | 0.008200356   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0018496751  |
| clipfrac           | 0.017666668   |
| explained_variance | 0.886         |
| fps                | 1278          |
| n_updates          | 512           |
| policy_entropy     | 0.68358684    |
| policy_loss        | -0.0029612393 |
| serial_timesteps   | 1536000       |
| time_elapsed       | 2.93e+03      |
| total_timesteps    | 1536000       |
| value_loss         | 0.008581215   |
-------------------------

--------------------------------------
| approxkl           | 0.0014117954  |
| clipfrac           | 0.015749998   |
| explained_variance | 0.884         |
| fps                | 1374          |
| n_updates          | 528           |
| policy_entropy     | 0.675297      |
| policy_loss        | -0.0021766145 |
| serial_timesteps   | 1584000       |
| time_elapsed       | 2.97e+03      |
| total_timesteps    | 1584000       |
| value_loss         | 0.008222062   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0012301424  |
| clipfrac           | 0.017083334   |
| explained_variance | 0.901         |
| fps                | 1374          |
| n_updates          | 529           |
| policy_entropy     | 0.536821      |
| policy_loss        | -0.0029134245 |
| serial_timesteps   | 1587000       |
| time_elapsed       | 2.97e+03      |
| total_timesteps    | 1587000       |
| value_loss         | 0.007738298   |
-------------------------

--------------------------------------
| approxkl           | 0.0026171496  |
| clipfrac           | 0.032083333   |
| explained_variance | 0.921         |
| fps                | 1261          |
| n_updates          | 545           |
| policy_entropy     | 0.8556556     |
| policy_loss        | -0.0037830183 |
| serial_timesteps   | 1635000       |
| time_elapsed       | 3.01e+03      |
| total_timesteps    | 1635000       |
| value_loss         | 0.007739505   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0015123343  |
| clipfrac           | 0.017916668   |
| explained_variance | 0.861         |
| fps                | 1267          |
| n_updates          | 546           |
| policy_entropy     | 0.65907043    |
| policy_loss        | -0.0015510018 |
| serial_timesteps   | 1638000       |
| time_elapsed       | 3.01e+03      |
| total_timesteps    | 1638000       |
| value_loss         | 0.008407417   |
-------------------------

--------------------------------------
| approxkl           | 0.00232169    |
| clipfrac           | 0.021083333   |
| explained_variance | 0.92          |
| fps                | 1356          |
| n_updates          | 562           |
| policy_entropy     | 0.6241162     |
| policy_loss        | -0.0039078966 |
| serial_timesteps   | 1686000       |
| time_elapsed       | 3.04e+03      |
| total_timesteps    | 1686000       |
| value_loss         | 0.008660954   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0014877634  |
| clipfrac           | 0.010666667   |
| explained_variance | 0.862         |
| fps                | 1332          |
| n_updates          | 563           |
| policy_entropy     | 0.88201475    |
| policy_loss        | -0.0013537739 |
| serial_timesteps   | 1689000       |
| time_elapsed       | 3.05e+03      |
| total_timesteps    | 1689000       |
| value_loss         | 0.008443274   |
-------------------------

--------------------------------------
| approxkl           | 0.0013215179  |
| clipfrac           | 0.011333333   |
| explained_variance | 0.856         |
| fps                | 1271          |
| n_updates          | 579           |
| policy_entropy     | 0.64684826    |
| policy_loss        | -0.0021526744 |
| serial_timesteps   | 1737000       |
| time_elapsed       | 3.08e+03      |
| total_timesteps    | 1737000       |
| value_loss         | 0.0083164815  |
--------------------------------------
--------------------------------------
| approxkl           | 0.0009804199  |
| clipfrac           | 0.00825       |
| explained_variance | 0.909         |
| fps                | 1279          |
| n_updates          | 580           |
| policy_entropy     | 0.5071031     |
| policy_loss        | -0.0023508098 |
| serial_timesteps   | 1740000       |
| time_elapsed       | 3.09e+03      |
| total_timesteps    | 1740000       |
| value_loss         | 0.007907067   |
-------------------------

-------------------------------------
| approxkl           | 0.0027956008 |
| clipfrac           | 0.031166669  |
| explained_variance | 0.927        |
| fps                | 1296         |
| n_updates          | 596          |
| policy_entropy     | 0.8842895    |
| policy_loss        | -0.003884429 |
| serial_timesteps   | 1788000      |
| time_elapsed       | 3.12e+03     |
| total_timesteps    | 1788000      |
| value_loss         | 0.0075754584 |
-------------------------------------
-------------------------------------
| approxkl           | 0.002530964  |
| clipfrac           | 0.0315       |
| explained_variance | 0.927        |
| fps                | 1307         |
| n_updates          | 597          |
| policy_entropy     | 0.60739154   |
| policy_loss        | -0.004354137 |
| serial_timesteps   | 1791000      |
| time_elapsed       | 3.12e+03     |
| total_timesteps    | 1791000      |
| value_loss         | 0.007560148  |
-------------------------------------
------------

--------------------------------------
| approxkl           | 0.0016580848  |
| clipfrac           | 0.013833334   |
| explained_variance | 0.915         |
| fps                | 1363          |
| n_updates          | 613           |
| policy_entropy     | 0.71575737    |
| policy_loss        | -0.0030773624 |
| serial_timesteps   | 1839000       |
| time_elapsed       | 3.16e+03      |
| total_timesteps    | 1839000       |
| value_loss         | 0.0076527223  |
--------------------------------------
--------------------------------------
| approxkl           | 0.0015855661  |
| clipfrac           | 0.01375       |
| explained_variance | 0.88          |
| fps                | 1308          |
| n_updates          | 614           |
| policy_entropy     | 0.6594837     |
| policy_loss        | -0.0027353654 |
| serial_timesteps   | 1842000       |
| time_elapsed       | 3.16e+03      |
| total_timesteps    | 1842000       |
| value_loss         | 0.009420846   |
-------------------------

-------------------------------------
| approxkl           | 0.0012374133 |
| clipfrac           | 0.011        |
| explained_variance | 0.947        |
| fps                | 1341         |
| n_updates          | 630          |
| policy_entropy     | 0.6252063    |
| policy_loss        | -0.002537794 |
| serial_timesteps   | 1890000      |
| time_elapsed       | 3.2e+03      |
| total_timesteps    | 1890000      |
| value_loss         | 0.0075543774 |
-------------------------------------
--------------------------------------
| approxkl           | 0.0011015463  |
| clipfrac           | 0.008166667   |
| explained_variance | 0.869         |
| fps                | 1372          |
| n_updates          | 631           |
| policy_entropy     | 0.59852284    |
| policy_loss        | -0.0057563856 |
| serial_timesteps   | 1893000       |
| time_elapsed       | 3.2e+03       |
| total_timesteps    | 1893000       |
| value_loss         | 0.01089347    |
--------------------------------------

--------------------------------------
| approxkl           | 0.0009237578  |
| clipfrac           | 0.009333333   |
| explained_variance | 0.877         |
| fps                | 1251          |
| n_updates          | 647           |
| policy_entropy     | 0.6196615     |
| policy_loss        | -0.0019365398 |
| serial_timesteps   | 1941000       |
| time_elapsed       | 3.24e+03      |
| total_timesteps    | 1941000       |
| value_loss         | 0.008371649   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0009185894  |
| clipfrac           | 0.005416666   |
| explained_variance | 0.797         |
| fps                | 1236          |
| n_updates          | 648           |
| policy_entropy     | 0.91747165    |
| policy_loss        | -0.0013356651 |
| serial_timesteps   | 1944000       |
| time_elapsed       | 3.24e+03      |
| total_timesteps    | 1944000       |
| value_loss         | 0.008328097   |
-------------------------

--------------------------------------
| approxkl           | 0.0004283349  |
| clipfrac           | 0.005         |
| explained_variance | 0.901         |
| fps                | 1378          |
| n_updates          | 664           |
| policy_entropy     | 0.619521      |
| policy_loss        | -0.0013153235 |
| serial_timesteps   | 1992000       |
| time_elapsed       | 3.28e+03      |
| total_timesteps    | 1992000       |
| value_loss         | 0.00788715    |
--------------------------------------
--------------------------------------
| approxkl           | 0.000991507   |
| clipfrac           | 0.0050833337  |
| explained_variance | 0.898         |
| fps                | 1318          |
| n_updates          | 665           |
| policy_entropy     | 0.8690548     |
| policy_loss        | -0.0018928023 |
| serial_timesteps   | 1995000       |
| time_elapsed       | 3.28e+03      |
| total_timesteps    | 1995000       |
| value_loss         | 0.008527742   |
-------------------------

--------------------------------------
| approxkl           | 0.0015601102  |
| clipfrac           | 0.015666667   |
| explained_variance | 0.948         |
| fps                | 1308          |
| n_updates          | 681           |
| policy_entropy     | 0.7551438     |
| policy_loss        | -0.0017464823 |
| serial_timesteps   | 2043000       |
| time_elapsed       | 3.32e+03      |
| total_timesteps    | 2043000       |
| value_loss         | 0.0068433397  |
--------------------------------------
-------------------------------------
| approxkl           | 0.002291143  |
| clipfrac           | 0.024999999  |
| explained_variance | 0.899        |
| fps                | 1330         |
| n_updates          | 682          |
| policy_entropy     | 0.93380165   |
| policy_loss        | -0.002186955 |
| serial_timesteps   | 2046000      |
| time_elapsed       | 3.32e+03     |
| total_timesteps    | 2046000      |
| value_loss         | 0.007388516  |
-------------------------------------

-------------------------------------
| approxkl           | 0.0017004758 |
| clipfrac           | 0.019833334  |
| explained_variance | 0.9          |
| fps                | 1205         |
| n_updates          | 698          |
| policy_entropy     | 0.7629169    |
| policy_loss        | -0.002188155 |
| serial_timesteps   | 2094000      |
| time_elapsed       | 3.36e+03     |
| total_timesteps    | 2094000      |
| value_loss         | 0.008074803  |
-------------------------------------
-------------------------------------
| approxkl           | 0.0019927921 |
| clipfrac           | 0.021166667  |
| explained_variance | 0.883        |
| fps                | 1155         |
| n_updates          | 699          |
| policy_entropy     | 0.78796494   |
| policy_loss        | -0.003091456 |
| serial_timesteps   | 2097000      |
| time_elapsed       | 3.36e+03     |
| total_timesteps    | 2097000      |
| value_loss         | 0.008315852  |
-------------------------------------
------------

--------------------------------------
| approxkl           | 0.0020346215  |
| clipfrac           | 0.026583334   |
| explained_variance | 0.913         |
| fps                | 1147          |
| n_updates          | 715           |
| policy_entropy     | 0.6562716     |
| policy_loss        | -0.0011493878 |
| serial_timesteps   | 2145000       |
| time_elapsed       | 3.4e+03       |
| total_timesteps    | 2145000       |
| value_loss         | 0.00771578    |
--------------------------------------
--------------------------------------
| approxkl           | 0.00091073976 |
| clipfrac           | 0.0145000005  |
| explained_variance | 0.835         |
| fps                | 1154          |
| n_updates          | 716           |
| policy_entropy     | 0.64554894    |
| policy_loss        | -0.0015669243 |
| serial_timesteps   | 2148000       |
| time_elapsed       | 3.4e+03       |
| total_timesteps    | 2148000       |
| value_loss         | 0.008486984   |
-------------------------

-------------------------------------
| approxkl           | 0.0005138508 |
| clipfrac           | 0.00175      |
| explained_variance | 0.901        |
| fps                | 1063         |
| n_updates          | 732          |
| policy_entropy     | 0.6460943    |
| policy_loss        | -0.001030592 |
| serial_timesteps   | 2196000      |
| time_elapsed       | 3.45e+03     |
| total_timesteps    | 2196000      |
| value_loss         | 0.007820922  |
-------------------------------------
--------------------------------------
| approxkl           | 0.0006786899  |
| clipfrac           | 0.00125       |
| explained_variance | 0.838         |
| fps                | 1062          |
| n_updates          | 733           |
| policy_entropy     | 0.7541129     |
| policy_loss        | -0.0020341985 |
| serial_timesteps   | 2199000       |
| time_elapsed       | 3.45e+03      |
| total_timesteps    | 2199000       |
| value_loss         | 0.008441813   |
--------------------------------------

--------------------------------------
| approxkl           | 0.0018850663  |
| clipfrac           | 0.013583333   |
| explained_variance | 0.933         |
| fps                | 1138          |
| n_updates          | 749           |
| policy_entropy     | 0.54406625    |
| policy_loss        | -0.0026415735 |
| serial_timesteps   | 2247000       |
| time_elapsed       | 3.49e+03      |
| total_timesteps    | 2247000       |
| value_loss         | 0.008068293   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0004894913  |
| clipfrac           | 0.0035        |
| explained_variance | 0.901         |
| fps                | 1109          |
| n_updates          | 750           |
| policy_entropy     | 0.63093203    |
| policy_loss        | -0.0013491584 |
| serial_timesteps   | 2250000       |
| time_elapsed       | 3.5e+03       |
| total_timesteps    | 2250000       |
| value_loss         | 0.0078341     |
-------------------------

--------------------------------------
| approxkl           | 0.00031183386 |
| clipfrac           | 0.0           |
| explained_variance | 0.907         |
| fps                | 1095          |
| n_updates          | 766           |
| policy_entropy     | 0.52983856    |
| policy_loss        | -0.0012489439 |
| serial_timesteps   | 2298000       |
| time_elapsed       | 3.54e+03      |
| total_timesteps    | 2298000       |
| value_loss         | 0.007894573   |
--------------------------------------
-------------------------------------
| approxkl           | 0.0018371651 |
| clipfrac           | 0.026833331  |
| explained_variance | 0.812        |
| fps                | 1010         |
| n_updates          | 767          |
| policy_entropy     | 0.6098858    |
| policy_loss        | -0.002585005 |
| serial_timesteps   | 2301000      |
| time_elapsed       | 3.54e+03     |
| total_timesteps    | 2301000      |
| value_loss         | 0.008100141  |
-------------------------------------

--------------------------------------
| approxkl           | 0.002681435   |
| clipfrac           | 0.031583335   |
| explained_variance | 0.889         |
| fps                | 1108          |
| n_updates          | 783           |
| policy_entropy     | 0.6876602     |
| policy_loss        | -0.0024713522 |
| serial_timesteps   | 2349000       |
| time_elapsed       | 3.59e+03      |
| total_timesteps    | 2349000       |
| value_loss         | 0.008012785   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0021227354  |
| clipfrac           | 0.036083333   |
| explained_variance | 0.853         |
| fps                | 1112          |
| n_updates          | 784           |
| policy_entropy     | 0.72679937    |
| policy_loss        | -0.0026699658 |
| serial_timesteps   | 2352000       |
| time_elapsed       | 3.59e+03      |
| total_timesteps    | 2352000       |
| value_loss         | 0.0083809355  |
-------------------------

--------------------------------------
| approxkl           | 0.00052001135 |
| clipfrac           | 0.0035        |
| explained_variance | 0.822         |
| fps                | 1061          |
| n_updates          | 800           |
| policy_entropy     | 0.59272027    |
| policy_loss        | -0.0017187886 |
| serial_timesteps   | 2400000       |
| time_elapsed       | 3.63e+03      |
| total_timesteps    | 2400000       |
| value_loss         | 0.008394748   |
--------------------------------------
-------------------------------------
| approxkl           | 0.0021956908 |
| clipfrac           | 0.017083332  |
| explained_variance | 0.896        |
| fps                | 1058         |
| n_updates          | 801          |
| policy_entropy     | 0.6196223    |
| policy_loss        | -0.003287138 |
| serial_timesteps   | 2403000      |
| time_elapsed       | 3.64e+03     |
| total_timesteps    | 2403000      |
| value_loss         | 0.009149089  |
-------------------------------------

--------------------------------------
| approxkl           | 0.001432171   |
| clipfrac           | 0.016         |
| explained_variance | 0.891         |
| fps                | 1055          |
| n_updates          | 817           |
| policy_entropy     | 0.8891194     |
| policy_loss        | -0.0022162125 |
| serial_timesteps   | 2451000       |
| time_elapsed       | 3.68e+03      |
| total_timesteps    | 2451000       |
| value_loss         | 0.008533759   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0010715742  |
| clipfrac           | 0.0076666665  |
| explained_variance | 0.929         |
| fps                | 1069          |
| n_updates          | 818           |
| policy_entropy     | 0.7833637     |
| policy_loss        | -0.0035982986 |
| serial_timesteps   | 2454000       |
| time_elapsed       | 3.68e+03      |
| total_timesteps    | 2454000       |
| value_loss         | 0.007292841   |
-------------------------

--------------------------------------
| approxkl           | 0.0014923724  |
| clipfrac           | 0.011083334   |
| explained_variance | 0.893         |
| fps                | 1094          |
| n_updates          | 834           |
| policy_entropy     | 0.8693087     |
| policy_loss        | -0.0037228053 |
| serial_timesteps   | 2502000       |
| time_elapsed       | 3.73e+03      |
| total_timesteps    | 2502000       |
| value_loss         | 0.007848307   |
--------------------------------------
--------------------------------------
| approxkl           | 0.001546814   |
| clipfrac           | 0.012083333   |
| explained_variance | 0.921         |
| fps                | 1097          |
| n_updates          | 835           |
| policy_entropy     | 0.85992104    |
| policy_loss        | -0.0024532974 |
| serial_timesteps   | 2505000       |
| time_elapsed       | 3.73e+03      |
| total_timesteps    | 2505000       |
| value_loss         | 0.0076589556  |
-------------------------

--------------------------------------
| approxkl           | 0.0021131118  |
| clipfrac           | 0.023249999   |
| explained_variance | 0.925         |
| fps                | 1139          |
| n_updates          | 851           |
| policy_entropy     | 0.8693306     |
| policy_loss        | -0.0054936092 |
| serial_timesteps   | 2553000       |
| time_elapsed       | 3.77e+03      |
| total_timesteps    | 2553000       |
| value_loss         | 0.0072341035  |
--------------------------------------
--------------------------------------
| approxkl           | 0.0016473029  |
| clipfrac           | 0.01825       |
| explained_variance | 0.929         |
| fps                | 1137          |
| n_updates          | 852           |
| policy_entropy     | 0.85721076    |
| policy_loss        | -0.0015938758 |
| serial_timesteps   | 2556000       |
| time_elapsed       | 3.78e+03      |
| total_timesteps    | 2556000       |
| value_loss         | 0.007922322   |
-------------------------

-------------------------------------
| approxkl           | 0.0015310382 |
| clipfrac           | 0.011        |
| explained_variance | 0.792        |
| fps                | 1088         |
| n_updates          | 868          |
| policy_entropy     | 0.9408133    |
| policy_loss        | -0.003768099 |
| serial_timesteps   | 2604000      |
| time_elapsed       | 3.82e+03     |
| total_timesteps    | 2604000      |
| value_loss         | 0.0083171595 |
-------------------------------------
--------------------------------------
| approxkl           | 0.0024069156  |
| clipfrac           | 0.024416668   |
| explained_variance | 0.834         |
| fps                | 1081          |
| n_updates          | 869           |
| policy_entropy     | 0.8456158     |
| policy_loss        | -0.0030856852 |
| serial_timesteps   | 2607000       |
| time_elapsed       | 3.82e+03      |
| total_timesteps    | 2607000       |
| value_loss         | 0.008651206   |
--------------------------------------

--------------------------------------
| approxkl           | 0.003111777   |
| clipfrac           | 0.038833335   |
| explained_variance | 0.926         |
| fps                | 1039          |
| n_updates          | 885           |
| policy_entropy     | 0.9046453     |
| policy_loss        | -0.0031061103 |
| serial_timesteps   | 2655000       |
| time_elapsed       | 3.87e+03      |
| total_timesteps    | 2655000       |
| value_loss         | 0.0072849556  |
--------------------------------------
-------------------------------------
| approxkl           | 0.0029645532 |
| clipfrac           | 0.036166664  |
| explained_variance | 0.914        |
| fps                | 1039         |
| n_updates          | 886          |
| policy_entropy     | 0.95072144   |
| policy_loss        | -0.003267442 |
| serial_timesteps   | 2658000      |
| time_elapsed       | 3.87e+03     |
| total_timesteps    | 2658000      |
| value_loss         | 0.007405705  |
-------------------------------------

--------------------------------------
| approxkl           | 0.0013772846  |
| clipfrac           | 0.015833333   |
| explained_variance | 0.891         |
| fps                | 969           |
| n_updates          | 902           |
| policy_entropy     | 0.7522278     |
| policy_loss        | -0.0038796756 |
| serial_timesteps   | 2706000       |
| time_elapsed       | 3.94e+03      |
| total_timesteps    | 2706000       |
| value_loss         | 0.008310184   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0018683011  |
| clipfrac           | 0.016666666   |
| explained_variance | 0.904         |
| fps                | 1020          |
| n_updates          | 903           |
| policy_entropy     | 0.8400625     |
| policy_loss        | -0.0042528324 |
| serial_timesteps   | 2709000       |
| time_elapsed       | 3.95e+03      |
| total_timesteps    | 2709000       |
| value_loss         | 0.008193468   |
-------------------------

-------------------------------------
| approxkl           | 0.0026124152 |
| clipfrac           | 0.0255       |
| explained_variance | 0.889        |
| fps                | 448          |
| n_updates          | 919          |
| policy_entropy     | 0.90242106   |
| policy_loss        | -0.003804793 |
| serial_timesteps   | 2757000      |
| time_elapsed       | 4.01e+03     |
| total_timesteps    | 2757000      |
| value_loss         | 0.007875697  |
-------------------------------------
--------------------------------------
| approxkl           | 0.003737328   |
| clipfrac           | 0.051749997   |
| explained_variance | 0.929         |
| fps                | 491           |
| n_updates          | 920           |
| policy_entropy     | 0.8017529     |
| policy_loss        | -0.0038728167 |
| serial_timesteps   | 2760000       |
| time_elapsed       | 4.01e+03      |
| total_timesteps    | 2760000       |
| value_loss         | 0.007915933   |
--------------------------------------

--------------------------------------
| approxkl           | 0.0008423168  |
| clipfrac           | 0.0045        |
| explained_variance | 0.894         |
| fps                | 750           |
| n_updates          | 936           |
| policy_entropy     | 0.7880427     |
| policy_loss        | -0.0016437749 |
| serial_timesteps   | 2808000       |
| time_elapsed       | 4.07e+03      |
| total_timesteps    | 2808000       |
| value_loss         | 0.008396969   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0033355893  |
| clipfrac           | 0.037666667   |
| explained_variance | 0.908         |
| fps                | 659           |
| n_updates          | 937           |
| policy_entropy     | 0.7267246     |
| policy_loss        | -0.0054900083 |
| serial_timesteps   | 2811000       |
| time_elapsed       | 4.08e+03      |
| total_timesteps    | 2811000       |
| value_loss         | 0.007875252   |
-------------------------

-------------------------------------
| approxkl           | 0.002476186  |
| clipfrac           | 0.029000001  |
| explained_variance | 0.891        |
| fps                | 998          |
| n_updates          | 953          |
| policy_entropy     | 0.8026706    |
| policy_loss        | -0.003938652 |
| serial_timesteps   | 2859000      |
| time_elapsed       | 4.16e+03     |
| total_timesteps    | 2859000      |
| value_loss         | 0.0075562163 |
-------------------------------------
--------------------------------------
| approxkl           | 0.0011029656  |
| clipfrac           | 0.0069166664  |
| explained_variance | 0.919         |
| fps                | 962           |
| n_updates          | 954           |
| policy_entropy     | 0.8779181     |
| policy_loss        | -0.0014895431 |
| serial_timesteps   | 2862000       |
| time_elapsed       | 4.16e+03      |
| total_timesteps    | 2862000       |
| value_loss         | 0.0085504055  |
--------------------------------------

--------------------------------------
| approxkl           | 0.0029775288  |
| clipfrac           | 0.038583335   |
| explained_variance | 0.888         |
| fps                | 615           |
| n_updates          | 970           |
| policy_entropy     | 0.87254685    |
| policy_loss        | -0.0028761474 |
| serial_timesteps   | 2910000       |
| time_elapsed       | 4.24e+03      |
| total_timesteps    | 2910000       |
| value_loss         | 0.008720906   |
--------------------------------------
--------------------------------------
| approxkl           | 0.001944902   |
| clipfrac           | 0.024083335   |
| explained_variance | 0.917         |
| fps                | 703           |
| n_updates          | 971           |
| policy_entropy     | 0.7902935     |
| policy_loss        | -0.0022910712 |
| serial_timesteps   | 2913000       |
| time_elapsed       | 4.25e+03      |
| total_timesteps    | 2913000       |
| value_loss         | 0.007869865   |
-------------------------

-------------------------------------
| approxkl           | 0.0022074345 |
| clipfrac           | 0.020333333  |
| explained_variance | 0.922        |
| fps                | 422          |
| n_updates          | 987          |
| policy_entropy     | 0.87946177   |
| policy_loss        | -0.003064996 |
| serial_timesteps   | 2961000      |
| time_elapsed       | 4.32e+03     |
| total_timesteps    | 2961000      |
| value_loss         | 0.0074090343 |
-------------------------------------
--------------------------------------
| approxkl           | 0.0014887179  |
| clipfrac           | 0.011583334   |
| explained_variance | 0.898         |
| fps                | 0             |
| n_updates          | 988           |
| policy_entropy     | 0.9974217     |
| policy_loss        | -0.0035741436 |
| serial_timesteps   | 2964000       |
| time_elapsed       | 4.32e+03      |
| total_timesteps    | 2964000       |
| value_loss         | 0.0076368814  |
--------------------------------------

--------------------------------------
| approxkl           | 0.0010092163  |
| clipfrac           | 0.00875       |
| explained_variance | 0.899         |
| fps                | 1598          |
| n_updates          | 1004          |
| policy_entropy     | 0.91352695    |
| policy_loss        | -0.0015673225 |
| serial_timesteps   | 3012000       |
| time_elapsed       | 7.65e+03      |
| total_timesteps    | 3012000       |
| value_loss         | 0.007491856   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0020623065  |
| clipfrac           | 0.02275       |
| explained_variance | 0.902         |
| fps                | 1395          |
| n_updates          | 1005          |
| policy_entropy     | 0.8689799     |
| policy_loss        | -0.0016744229 |
| serial_timesteps   | 3015000       |
| time_elapsed       | 7.65e+03      |
| total_timesteps    | 3015000       |
| value_loss         | 0.007750592   |
-------------------------

-------------------------------------
| approxkl           | 0.0025859703 |
| clipfrac           | 0.027833333  |
| explained_variance | 0.878        |
| fps                | 1428         |
| n_updates          | 1021         |
| policy_entropy     | 0.91231394   |
| policy_loss        | -0.003052162 |
| serial_timesteps   | 3063000      |
| time_elapsed       | 7.68e+03     |
| total_timesteps    | 3063000      |
| value_loss         | 0.008405728  |
-------------------------------------
-------------------------------------
| approxkl           | 0.0032891012 |
| clipfrac           | 0.0315       |
| explained_variance | 0.904        |
| fps                | 1425         |
| n_updates          | 1022         |
| policy_entropy     | 0.9736867    |
| policy_loss        | -0.003867892 |
| serial_timesteps   | 3066000      |
| time_elapsed       | 7.68e+03     |
| total_timesteps    | 3066000      |
| value_loss         | 0.0074986466 |
-------------------------------------
------------

--------------------------------------
| approxkl           | 0.0021636551  |
| clipfrac           | 0.024666667   |
| explained_variance | 0.854         |
| fps                | 1297          |
| n_updates          | 1038          |
| policy_entropy     | 0.8805156     |
| policy_loss        | -0.0028215442 |
| serial_timesteps   | 3114000       |
| time_elapsed       | 7.72e+03      |
| total_timesteps    | 3114000       |
| value_loss         | 0.00847963    |
--------------------------------------
--------------------------------------
| approxkl           | 0.00057938905 |
| clipfrac           | 0.0018333333  |
| explained_variance | 0.914         |
| fps                | 1286          |
| n_updates          | 1039          |
| policy_entropy     | 0.80099386    |
| policy_loss        | -0.0022687372 |
| serial_timesteps   | 3117000       |
| time_elapsed       | 7.72e+03      |
| total_timesteps    | 3117000       |
| value_loss         | 0.008011477   |
-------------------------

--------------------------------------
| approxkl           | 0.0013990041  |
| clipfrac           | 0.017666666   |
| explained_variance | 0.925         |
| fps                | 1328          |
| n_updates          | 1055          |
| policy_entropy     | 0.85853213    |
| policy_loss        | -0.0031164081 |
| serial_timesteps   | 3165000       |
| time_elapsed       | 7.76e+03      |
| total_timesteps    | 3165000       |
| value_loss         | 0.007243646   |
--------------------------------------
--------------------------------------
| approxkl           | 0.00063932454 |
| clipfrac           | 0.0019166667  |
| explained_variance | 0.925         |
| fps                | 1328          |
| n_updates          | 1056          |
| policy_entropy     | 0.8603822     |
| policy_loss        | -0.001971476  |
| serial_timesteps   | 3168000       |
| time_elapsed       | 7.76e+03      |
| total_timesteps    | 3168000       |
| value_loss         | 0.007630569   |
-------------------------

--------------------------------------
| approxkl           | 0.0018325449  |
| clipfrac           | 0.014833333   |
| explained_variance | 0.892         |
| fps                | 1394          |
| n_updates          | 1072          |
| policy_entropy     | 0.92221856    |
| policy_loss        | -0.0041245394 |
| serial_timesteps   | 3216000       |
| time_elapsed       | 7.79e+03      |
| total_timesteps    | 3216000       |
| value_loss         | 0.0077948975  |
--------------------------------------
--------------------------------------
| approxkl           | 0.0039080847  |
| clipfrac           | 0.05341667    |
| explained_variance | 0.936         |
| fps                | 1360          |
| n_updates          | 1073          |
| policy_entropy     | 0.85023034    |
| policy_loss        | -0.0030763948 |
| serial_timesteps   | 3219000       |
| time_elapsed       | 7.8e+03       |
| total_timesteps    | 3219000       |
| value_loss         | 0.0075532403  |
-------------------------

--------------------------------------
| approxkl           | 0.0034306818  |
| clipfrac           | 0.04825       |
| explained_variance | 0.801         |
| fps                | 1426          |
| n_updates          | 1089          |
| policy_entropy     | 0.8760947     |
| policy_loss        | -0.0030729042 |
| serial_timesteps   | 3267000       |
| time_elapsed       | 7.83e+03      |
| total_timesteps    | 3267000       |
| value_loss         | 0.008431981   |
--------------------------------------
-------------------------------------
| approxkl           | 0.0034327833 |
| clipfrac           | 0.053083334  |
| explained_variance | 0.869        |
| fps                | 1451         |
| n_updates          | 1090         |
| policy_entropy     | 0.9037028    |
| policy_loss        | -0.00294308  |
| serial_timesteps   | 3270000      |
| time_elapsed       | 7.83e+03     |
| total_timesteps    | 3270000      |
| value_loss         | 0.007525682  |
-------------------------------------

--------------------------------------
| approxkl           | 0.0026259555  |
| clipfrac           | 0.036083333   |
| explained_variance | 0.819         |
| fps                | 1298          |
| n_updates          | 1106          |
| policy_entropy     | 0.9045336     |
| policy_loss        | -0.0037048878 |
| serial_timesteps   | 3318000       |
| time_elapsed       | 7.87e+03      |
| total_timesteps    | 3318000       |
| value_loss         | 0.008586037   |
--------------------------------------
-------------------------------------
| approxkl           | 0.0040217643 |
| clipfrac           | 0.050666664  |
| explained_variance | 0.9          |
| fps                | 1282         |
| n_updates          | 1107         |
| policy_entropy     | 0.8845125    |
| policy_loss        | -0.003438195 |
| serial_timesteps   | 3321000      |
| time_elapsed       | 7.87e+03     |
| total_timesteps    | 3321000      |
| value_loss         | 0.007966382  |
-------------------------------------

--------------------------------------
| approxkl           | 0.001990062   |
| clipfrac           | 0.021333333   |
| explained_variance | 0.939         |
| fps                | 1332          |
| n_updates          | 1123          |
| policy_entropy     | 0.85794103    |
| policy_loss        | -0.0030961526 |
| serial_timesteps   | 3369000       |
| time_elapsed       | 7.91e+03      |
| total_timesteps    | 3369000       |
| value_loss         | 0.0077454173  |
--------------------------------------
-------------------------------------
| approxkl           | 0.0018866834 |
| clipfrac           | 0.021        |
| explained_variance | 0.903        |
| fps                | 1346         |
| n_updates          | 1124         |
| policy_entropy     | 0.85914713   |
| policy_loss        | -0.002110885 |
| serial_timesteps   | 3372000      |
| time_elapsed       | 7.91e+03     |
| total_timesteps    | 3372000      |
| value_loss         | 0.007952373  |
-------------------------------------

--------------------------------------
| approxkl           | 0.0006998662  |
| clipfrac           | 0.0044166665  |
| explained_variance | 0.867         |
| fps                | 1391          |
| n_updates          | 1140          |
| policy_entropy     | 0.7690001     |
| policy_loss        | -0.0020468363 |
| serial_timesteps   | 3420000       |
| time_elapsed       | 7.95e+03      |
| total_timesteps    | 3420000       |
| value_loss         | 0.008224362   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0015913581  |
| clipfrac           | 0.023         |
| explained_variance | 0.81          |
| fps                | 1370          |
| n_updates          | 1141          |
| policy_entropy     | 0.81840456    |
| policy_loss        | -0.0022539261 |
| serial_timesteps   | 3423000       |
| time_elapsed       | 7.95e+03      |
| total_timesteps    | 3423000       |
| value_loss         | 0.008319388   |
-------------------------

--------------------------------------
| approxkl           | 0.00077673455 |
| clipfrac           | 0.0048333337  |
| explained_variance | 0.916         |
| fps                | 1383          |
| n_updates          | 1157          |
| policy_entropy     | 0.84093565    |
| policy_loss        | -0.0030920275 |
| serial_timesteps   | 3471000       |
| time_elapsed       | 7.98e+03      |
| total_timesteps    | 3471000       |
| value_loss         | 0.008574133   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0030098113  |
| clipfrac           | 0.039333336   |
| explained_variance | 0.919         |
| fps                | 1383          |
| n_updates          | 1158          |
| policy_entropy     | 0.8265868     |
| policy_loss        | -0.0046225414 |
| serial_timesteps   | 3474000       |
| time_elapsed       | 7.99e+03      |
| total_timesteps    | 3474000       |
| value_loss         | 0.007678579   |
-------------------------

--------------------------------------
| approxkl           | 0.0006393856  |
| clipfrac           | 0.0031666665  |
| explained_variance | 0.775         |
| fps                | 1312          |
| n_updates          | 1174          |
| policy_entropy     | 0.73948747    |
| policy_loss        | -0.0017047226 |
| serial_timesteps   | 3522000       |
| time_elapsed       | 8.02e+03      |
| total_timesteps    | 3522000       |
| value_loss         | 0.008600605   |
--------------------------------------
-------------------------------------
| approxkl           | 0.0022204546 |
| clipfrac           | 0.03908333   |
| explained_variance | 0.941        |
| fps                | 1262         |
| n_updates          | 1175         |
| policy_entropy     | 0.7085553    |
| policy_loss        | -0.002731457 |
| serial_timesteps   | 3525000      |
| time_elapsed       | 8.02e+03     |
| total_timesteps    | 3525000      |
| value_loss         | 0.0070865825 |
-------------------------------------

-------------------------------------
| approxkl           | 0.0006027437 |
| clipfrac           | 0.005583334  |
| explained_variance | 0.937        |
| fps                | 1407         |
| n_updates          | 1191         |
| policy_entropy     | 0.59515214   |
| policy_loss        | -0.001758332 |
| serial_timesteps   | 3573000      |
| time_elapsed       | 8.06e+03     |
| total_timesteps    | 3573000      |
| value_loss         | 0.0069228644 |
-------------------------------------
--------------------------------------
| approxkl           | 0.0011285358  |
| clipfrac           | 0.014583334   |
| explained_variance | 0.924         |
| fps                | 1391          |
| n_updates          | 1192          |
| policy_entropy     | 0.6571984     |
| policy_loss        | -0.0017425963 |
| serial_timesteps   | 3576000       |
| time_elapsed       | 8.06e+03      |
| total_timesteps    | 3576000       |
| value_loss         | 0.0069798054  |
--------------------------------------

--------------------------------------
| approxkl           | 0.0021753092  |
| clipfrac           | 0.04016667    |
| explained_variance | 0.876         |
| fps                | 1421          |
| n_updates          | 1208          |
| policy_entropy     | 0.63688046    |
| policy_loss        | -0.0023197697 |
| serial_timesteps   | 3624000       |
| time_elapsed       | 8.1e+03       |
| total_timesteps    | 3624000       |
| value_loss         | 0.008265042   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0002911836  |
| clipfrac           | 0.0           |
| explained_variance | 0.93          |
| fps                | 1404          |
| n_updates          | 1209          |
| policy_entropy     | 0.62885344    |
| policy_loss        | -0.0007105172 |
| serial_timesteps   | 3627000       |
| time_elapsed       | 8.1e+03       |
| total_timesteps    | 3627000       |
| value_loss         | 0.007388551   |
-------------------------

--------------------------------------
| approxkl           | 0.00037234073 |
| clipfrac           | 0.0026666669  |
| explained_variance | 0.839         |
| fps                | 1404          |
| n_updates          | 1225          |
| policy_entropy     | 0.57451886    |
| policy_loss        | -0.0018266944 |
| serial_timesteps   | 3675000       |
| time_elapsed       | 8.14e+03      |
| total_timesteps    | 3675000       |
| value_loss         | 0.008090158   |
--------------------------------------
-------------------------------------
| approxkl           | 0.0011737759 |
| clipfrac           | 0.017916666  |
| explained_variance | 0.917        |
| fps                | 1400         |
| n_updates          | 1226         |
| policy_entropy     | 0.59786475   |
| policy_loss        | -0.003028235 |
| serial_timesteps   | 3678000      |
| time_elapsed       | 8.14e+03     |
| total_timesteps    | 3678000      |
| value_loss         | 0.007710149  |
-------------------------------------

--------------------------------------
| approxkl           | 0.001185754   |
| clipfrac           | 0.010166667   |
| explained_variance | 0.946         |
| fps                | 1283          |
| n_updates          | 1242          |
| policy_entropy     | 0.72340536    |
| policy_loss        | -0.0024137662 |
| serial_timesteps   | 3726000       |
| time_elapsed       | 8.17e+03      |
| total_timesteps    | 3726000       |
| value_loss         | 0.007116867   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0023067852  |
| clipfrac           | 0.027333334   |
| explained_variance | 0.902         |
| fps                | 1290          |
| n_updates          | 1243          |
| policy_entropy     | 0.7647033     |
| policy_loss        | -0.0023576878 |
| serial_timesteps   | 3729000       |
| time_elapsed       | 8.17e+03      |
| total_timesteps    | 3729000       |
| value_loss         | 0.008369757   |
-------------------------

--------------------------------------
| approxkl           | 0.0015867858  |
| clipfrac           | 0.016916666   |
| explained_variance | 0.912         |
| fps                | 1400          |
| n_updates          | 1259          |
| policy_entropy     | 0.7224007     |
| policy_loss        | -0.0027339878 |
| serial_timesteps   | 3777000       |
| time_elapsed       | 8.21e+03      |
| total_timesteps    | 3777000       |
| value_loss         | 0.0075591025  |
--------------------------------------
--------------------------------------
| approxkl           | 0.002745967   |
| clipfrac           | 0.043166667   |
| explained_variance | 0.848         |
| fps                | 1395          |
| n_updates          | 1260          |
| policy_entropy     | 0.80724245    |
| policy_loss        | -0.0039709173 |
| serial_timesteps   | 3780000       |
| time_elapsed       | 8.21e+03      |
| total_timesteps    | 3780000       |
| value_loss         | 0.0080998605  |
-------------------------

--------------------------------------
| approxkl           | 0.0007425808  |
| clipfrac           | 0.0070833336  |
| explained_variance | 0.862         |
| fps                | 1285          |
| n_updates          | 1276          |
| policy_entropy     | 0.68077874    |
| policy_loss        | -0.0012359559 |
| serial_timesteps   | 3828000       |
| time_elapsed       | 8.25e+03      |
| total_timesteps    | 3828000       |
| value_loss         | 0.0081814425  |
--------------------------------------
--------------------------------------
| approxkl           | 0.00039470254 |
| clipfrac           | 0.0030833334  |
| explained_variance | 0.926         |
| fps                | 1303          |
| n_updates          | 1277          |
| policy_entropy     | 0.669392      |
| policy_loss        | -0.0017051109 |
| serial_timesteps   | 3831000       |
| time_elapsed       | 8.25e+03      |
| total_timesteps    | 3831000       |
| value_loss         | 0.00749816    |
-------------------------

-------------------------------------
| approxkl           | 0.002228837  |
| clipfrac           | 0.03125      |
| explained_variance | 0.887        |
| fps                | 1405         |
| n_updates          | 1293         |
| policy_entropy     | 0.8233677    |
| policy_loss        | -0.003524066 |
| serial_timesteps   | 3879000      |
| time_elapsed       | 8.29e+03     |
| total_timesteps    | 3879000      |
| value_loss         | 0.008832895  |
-------------------------------------
--------------------------------------
| approxkl           | 0.0005979246  |
| clipfrac           | 0.0019166665  |
| explained_variance | 0.87          |
| fps                | 1344          |
| n_updates          | 1294          |
| policy_entropy     | 0.8347462     |
| policy_loss        | -0.0014280087 |
| serial_timesteps   | 3882000       |
| time_elapsed       | 8.29e+03      |
| total_timesteps    | 3882000       |
| value_loss         | 0.008522298   |
--------------------------------------

--------------------------------------
| approxkl           | 0.0013914912  |
| clipfrac           | 0.00925       |
| explained_variance | 0.894         |
| fps                | 1346          |
| n_updates          | 1310          |
| policy_entropy     | 0.92076737    |
| policy_loss        | -0.0014953044 |
| serial_timesteps   | 3930000       |
| time_elapsed       | 8.32e+03      |
| total_timesteps    | 3930000       |
| value_loss         | 0.008023205   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0022563906  |
| clipfrac           | 0.028833332   |
| explained_variance | 0.92          |
| fps                | 1299          |
| n_updates          | 1311          |
| policy_entropy     | 0.8803272     |
| policy_loss        | -0.0026474928 |
| serial_timesteps   | 3933000       |
| time_elapsed       | 8.33e+03      |
| total_timesteps    | 3933000       |
| value_loss         | 0.008032722   |
-------------------------

-------------------------------------
| approxkl           | 0.0018791371 |
| clipfrac           | 0.021583334  |
| explained_variance | 0.919        |
| fps                | 1388         |
| n_updates          | 1327         |
| policy_entropy     | 0.90923536   |
| policy_loss        | -0.003639622 |
| serial_timesteps   | 3981000      |
| time_elapsed       | 8.36e+03     |
| total_timesteps    | 3981000      |
| value_loss         | 0.007914837  |
-------------------------------------
--------------------------------------
| approxkl           | 0.0020186717  |
| clipfrac           | 0.021083333   |
| explained_variance | 0.885         |
| fps                | 1396          |
| n_updates          | 1328          |
| policy_entropy     | 0.94026804    |
| policy_loss        | -0.0041077198 |
| serial_timesteps   | 3984000       |
| time_elapsed       | 8.36e+03      |
| total_timesteps    | 3984000       |
| value_loss         | 0.0077917427  |
--------------------------------------

In [25]:
# del model # remove to demonstrate saving and loading
model = PPO2.load(load_path="PPO2_MlpPolicy_lesson1_Stable_4M")

n_cpu = 1
env = DummyVecEnv([lambda: env_pom for i in range(n_cpu)])
model.envs = env

# test the learned model
num_win = 0
num_tie = 0
num_lose = 0
total = 10 # number of playouts
for i_episode in range(total):
    obs = env.reset()
    done = False
    info = None
    while not done:
        env.render()
        action_training, _states = model.predict(obs)
#         print(action_training)
        obs, rewards, dones, infos = env.step(action_training)
#         print(infos)
        done = dones[0]
        info = infos[0]
        time.sleep(0.1)
    print('Episode {} finished'.format(i_episode))
    if(info["result"].value == 0):
        if(1 in info["winners"]):
            num_win+=1
        else:
            num_lose+=1
    elif(info["result"].value == 2):
        num_tie+=1
#     print(info)
env.close()
print("Win ", num_win, "/", total, " games")
print("Tie ", num_tie, "/", total, " games")
print("Lose ", num_lose, "/", total, " games")


Loading a model without an environment, this model cannot be trained until it has a valid environment.
Episode 0 finished
Episode 1 finished
Episode 2 finished
Episode 3 finished
Episode 4 finished
Episode 5 finished
Episode 6 finished
Episode 7 finished
Episode 8 finished
Episode 9 finished
Win  0 / 10  games
Tie  5 / 10  games
Lose  5 / 10  games


# Export JSON or PNG file

In [None]:
#create directories for saving renders; each episode should be in its own directory if recording multiple episodes

json_dir='./json_logs'
if not os.path.exists(json_dir):
    os.makedirs(json_dir)
print('record json dir:', json_dir)

png_dir='./png_logs'
if not os.path.exists(png_dir):
    os.makedirs(png_dir)
print('record png dir:', png_dir)


#for each step of environment...

env.render(record_pngs_dir=png_dir, record_json_dir=json_dir)
#run this only at the end of an episode to gather json steps
pommerman.utility.join_json_state(json_dir, ['agent0name','agent1name','agent2name','agent3name'], finished_at=0, config=env_id, info=info)

# baseline example code

In [None]:
import gym

from stable_baselines.common.policies import MlpPolicy
from stable_baselines.common.vec_env import SubprocVecEnv
from stable_baselines import PPO2

# multiprocess environment
n_cpu = 1
env = DummyVecEnv([lambda: gym.make('CartPole-v1') for i in range(n_cpu)])

model = PPO2(MlpPolicy, env, verbose=1)
model.learn(total_timesteps=25000)
model.save("ppo2_cartpole")

del model # remove to demonstrate saving and loading

model = PPO2.load("ppo2_cartpole")

# Enjoy trained agent
obs = env.reset()



In [None]:
print(obs)
print(env.buf_obs[None].shape)
print(env.observation_space)

action, _states = model.predict(obs)
obs, rewards, dones, info = env.step(action)

print(obs)
print(rewards)
print(dones)
print(info)

# while True:
#     action, _states = model.predict(obs)
#     obs, rewards, dones, info = env.step(action)
#     env.render()